Merge branch 'main' of github.com:lunary-ai/litellm

This commit is contained in:
Vince Loewe 2024-02-28 22:18:43 -08:00
commit ab415d5165
68 changed files with 2676 additions and 1126 deletions

View file

@ -130,6 +130,7 @@ jobs:
pip install "langfuse>=2.0.0" pip install "langfuse>=2.0.0"
pip install numpydoc pip install numpydoc
pip install prisma pip install prisma
pip install fastapi
pip install "httpx==0.24.1" pip install "httpx==0.24.1"
pip install "gunicorn==21.2.0" pip install "gunicorn==21.2.0"
pip install "anyio==3.7.1" pip install "anyio==3.7.1"

View file

@ -1,18 +1,25 @@
# Function Calling # Function Calling
Function calling is supported with the following models on OpenAI, Azure OpenAI
- gpt-4 ## Checking if a model supports function calling
- gpt-4-1106-preview
- gpt-4-0613
- gpt-3.5-turbo
- gpt-3.5-turbo-1106
- gpt-3.5-turbo-0613
- Non OpenAI LLMs (litellm adds the function call to the prompt for these llms)
In addition, parallel function calls is supported on the following models: Use `litellm.supports_function_calling(model="")` -> returns `True` if model supports Function calling, `False` if not
- gpt-4-1106-preview
- gpt-3.5-turbo-1106
```python
assert litellm.supports_function_calling(model="gpt-3.5-turbo") == True
assert litellm.supports_function_calling(model="azure/gpt-4-1106-preview") == True
assert litellm.supports_function_calling(model="palm/chat-bison") == False
assert litellm.supports_function_calling(model="ollama/llama2") == False
```
## Checking if a model supports parallel function calling
Use `litellm.supports_parallel_function_calling(model="")` -> returns `True` if model supports parallel function calling, `False` if not
```python
assert litellm.supports_parallel_function_calling(model="gpt-4-turbo-preview") == True
assert litellm.supports_parallel_function_calling(model="gpt-4") == False
```
## Parallel Function calling ## Parallel Function calling
Parallel function calling is the model's ability to perform multiple function calls together, allowing the effects and results of these function calls to be resolved in parallel Parallel function calling is the model's ability to perform multiple function calls together, allowing the effects and results of these function calls to be resolved in parallel

View file

@ -291,7 +291,6 @@ Here's an example of using a bedrock model with LiteLLM
| Anthropic Claude-V2.1 | `completion(model='bedrock/anthropic.claude-v2:1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` | | Anthropic Claude-V2.1 | `completion(model='bedrock/anthropic.claude-v2:1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
| Anthropic Claude-V2 | `completion(model='bedrock/anthropic.claude-v2', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` | | Anthropic Claude-V2 | `completion(model='bedrock/anthropic.claude-v2', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
| Anthropic Claude-Instant V1 | `completion(model='bedrock/anthropic.claude-instant-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` | | Anthropic Claude-Instant V1 | `completion(model='bedrock/anthropic.claude-instant-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
| Anthropic Claude-V1 | `completion(model='bedrock/anthropic.claude-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
| Amazon Titan Lite | `completion(model='bedrock/amazon.titan-text-lite-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` | | Amazon Titan Lite | `completion(model='bedrock/amazon.titan-text-lite-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
| Amazon Titan Express | `completion(model='bedrock/amazon.titan-text-express-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` | | Amazon Titan Express | `completion(model='bedrock/amazon.titan-text-express-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
| Cohere Command | `completion(model='bedrock/cohere.command-text-v14', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` | | Cohere Command | `completion(model='bedrock/cohere.command-text-v14', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |

View file

@ -0,0 +1,44 @@
# 💸 GET Daily Spend, Usage Metrics
## Request Format
```shell
curl -X GET "http://0.0.0.0:4000/daily_metrics" -H "Authorization: Bearer sk-1234"
```
## Response format
```json
[
daily_spend = [
{
"daily_spend": 7.9261938052047e+16,
"day": "2024-02-01T00:00:00",
"spend_per_model": {"azure/gpt-4": 7.9261938052047e+16},
"spend_per_api_key": {
"76": 914495704992000.0,
"12": 905726697912000.0,
"71": 866312628003000.0,
"28": 865461799332000.0,
"13": 859151538396000.0
}
},
{
"daily_spend": 7.938489251309491e+16,
"day": "2024-02-02T00:00:00",
"spend_per_model": {"gpt-3.5": 7.938489251309491e+16},
"spend_per_api_key": {
"91": 896805036036000.0,
"78": 889692646082000.0,
"49": 885386687861000.0,
"28": 873869890984000.0,
"56": 867398637692000.0
}
}
],
total_spend = 200,
top_models = {"gpt4": 0.2, "vertexai/gemini-pro":10},
top_api_keys = {"899922": 0.9, "838hcjd999seerr88": 20}
]
```

View file

@ -186,6 +186,20 @@ If you don't see all your keys this could be due to a cached token. So just re-l
::: :::
### Restrict UI Access
You can restrict UI Access to just admins - includes you (proxy_admin) and people you give view only access to (proxy_admin_viewer) for seeing global spend.
**Step 1. Set 'admin_only' access**
```yaml
general_settings:
ui_access_mode: "admin_only"
```
**Step 2. Invite view-only users**
<Image img={require('../../img/admin_ui_viewer.png')} />
### Custom Branding Admin UI ### Custom Branding Admin UI
Use your companies custom branding on the LiteLLM Admin UI Use your companies custom branding on the LiteLLM Admin UI

Binary file not shown.

After

Width:  |  Height:  |  Size: 131 KiB

View file

@ -40,6 +40,7 @@ const sidebars = {
"proxy/virtual_keys", "proxy/virtual_keys",
"proxy/users", "proxy/users",
"proxy/ui", "proxy/ui",
"proxy/metrics",
"proxy/model_management", "proxy/model_management",
"proxy/health", "proxy/health",
"proxy/debugging", "proxy/debugging",

View file

@ -110,3 +110,138 @@ async def view_spend_logs_from_clickhouse(
"log_count": num_rows, "log_count": num_rows,
} }
return response_data return response_data
def _create_clickhouse_material_views(client=None, table_names=[]):
# Create Materialized Views if they don't exist
# Materialized Views send new inserted rows to the aggregate tables
verbose_logger.debug("Clickhouse: Creating Materialized Views")
if "daily_aggregated_spend_per_model_mv" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
TO daily_aggregated_spend_per_model
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend,
model as model
FROM spend_logs
GROUP BY
day, model
"""
)
if "daily_aggregated_spend_per_api_key_mv" not in table_names:
verbose_logger.debug(
"Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
)
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
TO daily_aggregated_spend_per_api_key
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend,
api_key as api_key
FROM spend_logs
GROUP BY
day, api_key
"""
)
if "daily_aggregated_spend_per_user_mv" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
TO daily_aggregated_spend_per_user
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend,
user as user
FROM spend_logs
GROUP BY
day, user
"""
)
if "daily_aggregated_spend_mv" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
TO daily_aggregated_spend
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend
FROM spend_logs
GROUP BY
day
"""
)
def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
# Basic Logging works without this - this is only used for low latency reporting apis
verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
# Create Aggregeate Tables if they don't exist
if "daily_aggregated_spend_per_model" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
client.command(
"""
CREATE TABLE daily_aggregated_spend_per_model
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
`model` String
)
ENGINE = SummingMergeTree()
ORDER BY (day, model);
"""
)
if "daily_aggregated_spend_per_api_key" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
client.command(
"""
CREATE TABLE daily_aggregated_spend_per_api_key
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
`api_key` String
)
ENGINE = SummingMergeTree()
ORDER BY (day, api_key);
"""
)
if "daily_aggregated_spend_per_user" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
client.command(
"""
CREATE TABLE daily_aggregated_spend_per_user
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
`user` String
)
ENGINE = SummingMergeTree()
ORDER BY (day, user);
"""
)
if "daily_aggregated_spend" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
client.command(
"""
CREATE TABLE daily_aggregated_spend
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
)
ENGINE = SummingMergeTree()
ORDER BY (day);
"""
)
return

View file

@ -549,6 +549,8 @@ from .utils import (
token_counter, token_counter,
cost_per_token, cost_per_token,
completion_cost, completion_cost,
supports_function_calling,
supports_parallel_function_calling,
get_litellm_params, get_litellm_params,
Logging, Logging,
acreate, acreate,

View file

@ -27,6 +27,151 @@ import litellm, uuid
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
def create_client():
try:
import clickhouse_connect
port = os.getenv("CLICKHOUSE_PORT")
clickhouse_host = os.getenv("CLICKHOUSE_HOST")
if clickhouse_host is not None:
verbose_logger.debug("setting up clickhouse")
if port is not None and isinstance(port, str):
port = int(port)
client = clickhouse_connect.get_client(
host=os.getenv("CLICKHOUSE_HOST"),
port=port,
username=os.getenv("CLICKHOUSE_USERNAME"),
password=os.getenv("CLICKHOUSE_PASSWORD"),
)
return client
else:
raise Exception("Clickhouse: Clickhouse host not set")
except Exception as e:
raise ValueError(f"Clickhouse: {e}")
def build_daily_metrics():
click_house_client = create_client()
# get daily spend
daily_spend = click_house_client.query_df(
"""
SELECT sumMerge(DailySpend) as daily_spend, day FROM daily_aggregated_spend GROUP BY day
"""
)
# get daily spend per model
daily_spend_per_model = click_house_client.query_df(
"""
SELECT sumMerge(DailySpend) as daily_spend, day, model FROM daily_aggregated_spend_per_model GROUP BY day, model
"""
)
new_df = daily_spend_per_model.to_dict(orient="records")
import pandas as pd
df = pd.DataFrame(new_df)
# Group by 'day' and create a dictionary for each group
result_dict = {}
for day, group in df.groupby("day"):
models = group["model"].tolist()
spend = group["daily_spend"].tolist()
spend_per_model = {model: spend for model, spend in zip(models, spend)}
result_dict[day] = spend_per_model
# Display the resulting dictionary
# get daily spend per API key
daily_spend_per_api_key = click_house_client.query_df(
"""
SELECT
daily_spend,
day,
api_key
FROM (
SELECT
sumMerge(DailySpend) as daily_spend,
day,
api_key,
RANK() OVER (PARTITION BY day ORDER BY sumMerge(DailySpend) DESC) as spend_rank
FROM
daily_aggregated_spend_per_api_key
GROUP BY
day,
api_key
) AS ranked_api_keys
WHERE
spend_rank <= 5
AND day IS NOT NULL
ORDER BY
day,
daily_spend DESC
"""
)
new_df = daily_spend_per_api_key.to_dict(orient="records")
import pandas as pd
df = pd.DataFrame(new_df)
# Group by 'day' and create a dictionary for each group
api_key_result_dict = {}
for day, group in df.groupby("day"):
api_keys = group["api_key"].tolist()
spend = group["daily_spend"].tolist()
spend_per_api_key = {api_key: spend for api_key, spend in zip(api_keys, spend)}
api_key_result_dict[day] = spend_per_api_key
# Display the resulting dictionary
# Calculate total spend across all days
total_spend = daily_spend["daily_spend"].sum()
# Identify top models and top API keys with the highest spend across all days
top_models = {}
top_api_keys = {}
for day, spend_per_model in result_dict.items():
for model, model_spend in spend_per_model.items():
if model not in top_models or model_spend > top_models[model]:
top_models[model] = model_spend
for day, spend_per_api_key in api_key_result_dict.items():
for api_key, api_key_spend in spend_per_api_key.items():
if api_key not in top_api_keys or api_key_spend > top_api_keys[api_key]:
top_api_keys[api_key] = api_key_spend
# for each day in daily spend, look up the day in result_dict and api_key_result_dict
# Assuming daily_spend DataFrame has 'day' column
result = []
for index, row in daily_spend.iterrows():
day = row["day"]
data_day = row.to_dict()
# Look up in result_dict
if day in result_dict:
spend_per_model = result_dict[day]
# Assuming there is a column named 'model' in daily_spend
data_day["spend_per_model"] = spend_per_model # Assign 0 if model not found
# Look up in api_key_result_dict
if day in api_key_result_dict:
spend_per_api_key = api_key_result_dict[day]
# Assuming there is a column named 'api_key' in daily_spend
data_day["spend_per_api_key"] = spend_per_api_key
result.append(data_day)
data_to_return = {}
data_to_return["daily_spend"] = result
data_to_return["total_spend"] = total_spend
data_to_return["top_models"] = top_models
data_to_return["top_api_keys"] = top_api_keys
return data_to_return
# build_daily_metrics()
def _start_clickhouse(): def _start_clickhouse():
import clickhouse_connect import clickhouse_connect
@ -86,6 +231,14 @@ def _start_clickhouse():
response = client.query("DESCRIBE default.spend_logs") response = client.query("DESCRIBE default.spend_logs")
verbose_logger.debug(f"spend logs schema ={response.result_rows}") verbose_logger.debug(f"spend logs schema ={response.result_rows}")
# RUN Enterprise Clickhouse Setup
# TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
_create_clickhouse_aggregate_tables(client=client, table_names=table_names)
_create_clickhouse_material_views(client=client, table_names=table_names)
class ClickhouseLogger: class ClickhouseLogger:
# Class variables or attributes # Class variables or attributes

View file

@ -278,7 +278,11 @@ def completion(
import google.auth import google.auth
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744 ## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
print_verbose(
f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
)
creds, _ = google.auth.default(quota_project_id=vertex_project) creds, _ = google.auth.default(quota_project_id=vertex_project)
print_verbose(f"VERTEX AI: creds={creds}")
vertexai.init( vertexai.init(
project=vertex_project, location=vertex_location, credentials=creds project=vertex_project, location=vertex_location, credentials=creds
) )

View file

@ -10,7 +10,6 @@
import os, openai, sys, json, inspect, uuid, datetime, threading import os, openai, sys, json, inspect, uuid, datetime, threading
from typing import Any, Literal, Union from typing import Any, Literal, Union
from functools import partial from functools import partial
import dotenv, traceback, random, asyncio, time, contextvars import dotenv, traceback, random, asyncio, time, contextvars
from copy import deepcopy from copy import deepcopy
import httpx import httpx
@ -1468,12 +1467,14 @@ def completion(
response = model_response response = model_response
elif custom_llm_provider == "vertex_ai": elif custom_llm_provider == "vertex_ai":
vertex_ai_project = ( vertex_ai_project = (
optional_params.pop("vertex_ai_project", None) optional_params.pop("vertex_project", None)
or optional_params.pop("vertex_ai_project", None)
or litellm.vertex_project or litellm.vertex_project
or get_secret("VERTEXAI_PROJECT") or get_secret("VERTEXAI_PROJECT")
) )
vertex_ai_location = ( vertex_ai_location = (
optional_params.pop("vertex_ai_location", None) optional_params.pop("vertex_location", None)
or optional_params.pop("vertex_ai_location", None)
or litellm.vertex_location or litellm.vertex_location
or get_secret("VERTEXAI_LOCATION") or get_secret("VERTEXAI_LOCATION")
) )
@ -2567,12 +2568,14 @@ def embedding(
) )
elif custom_llm_provider == "vertex_ai": elif custom_llm_provider == "vertex_ai":
vertex_ai_project = ( vertex_ai_project = (
optional_params.pop("vertex_ai_project", None) optional_params.pop("vertex_project", None)
or optional_params.pop("vertex_ai_project", None)
or litellm.vertex_project or litellm.vertex_project
or get_secret("VERTEXAI_PROJECT") or get_secret("VERTEXAI_PROJECT")
) )
vertex_ai_location = ( vertex_ai_location = (
optional_params.pop("vertex_ai_location", None) optional_params.pop("vertex_location", None)
or optional_params.pop("vertex_ai_location", None)
or litellm.vertex_location or litellm.vertex_location
or get_secret("VERTEXAI_LOCATION") or get_secret("VERTEXAI_LOCATION")
) )

View file

@ -6,7 +6,8 @@
"input_cost_per_token": 0.00003, "input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"gpt-4-turbo-preview": { "gpt-4-turbo-preview": {
"max_tokens": 8192, "max_tokens": 8192,
@ -15,7 +16,9 @@
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"gpt-4-0314": { "gpt-4-0314": {
"max_tokens": 8192, "max_tokens": 8192,
@ -33,7 +36,8 @@
"input_cost_per_token": 0.00003, "input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"gpt-4-32k": { "gpt-4-32k": {
"max_tokens": 32768, "max_tokens": 32768,
@ -69,7 +73,9 @@
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"gpt-4-0125-preview": { "gpt-4-0125-preview": {
"max_tokens": 128000, "max_tokens": 128000,
@ -78,7 +84,9 @@
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"gpt-4-vision-preview": { "gpt-4-vision-preview": {
"max_tokens": 128000, "max_tokens": 128000,
@ -105,7 +113,8 @@
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"gpt-3.5-turbo-0301": { "gpt-3.5-turbo-0301": {
"max_tokens": 4097, "max_tokens": 4097,
@ -123,7 +132,8 @@
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"gpt-3.5-turbo-1106": { "gpt-3.5-turbo-1106": {
"max_tokens": 16385, "max_tokens": 16385,
@ -132,7 +142,9 @@
"input_cost_per_token": 0.0000010, "input_cost_per_token": 0.0000010,
"output_cost_per_token": 0.0000020, "output_cost_per_token": 0.0000020,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"gpt-3.5-turbo-0125": { "gpt-3.5-turbo-0125": {
"max_tokens": 16385, "max_tokens": 16385,
@ -141,7 +153,9 @@
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.0000005,
"output_cost_per_token": 0.0000015, "output_cost_per_token": 0.0000015,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"gpt-3.5-turbo-16k": { "gpt-3.5-turbo-16k": {
"max_tokens": 16385, "max_tokens": 16385,
@ -286,7 +300,9 @@
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"azure/gpt-4-1106-preview": { "azure/gpt-4-1106-preview": {
"max_tokens": 128000, "max_tokens": 128000,
@ -295,7 +311,9 @@
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"azure/gpt-4-0613": { "azure/gpt-4-0613": {
"max_tokens": 8192, "max_tokens": 8192,
@ -304,7 +322,8 @@
"input_cost_per_token": 0.00003, "input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"azure/gpt-4-32k-0613": { "azure/gpt-4-32k-0613": {
"max_tokens": 32768, "max_tokens": 32768,
@ -331,7 +350,8 @@
"input_cost_per_token": 0.00003, "input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"azure/gpt-4-turbo": { "azure/gpt-4-turbo": {
"max_tokens": 128000, "max_tokens": 128000,
@ -340,7 +360,9 @@
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"azure/gpt-4-turbo-vision-preview": { "azure/gpt-4-turbo-vision-preview": {
"max_tokens": 128000, "max_tokens": 128000,
@ -358,7 +380,8 @@
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004, "output_cost_per_token": 0.000004,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"azure/gpt-35-turbo-1106": { "azure/gpt-35-turbo-1106": {
"max_tokens": 16384, "max_tokens": 16384,
@ -367,7 +390,20 @@
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"azure/gpt-35-turbo-0125": {
"max_tokens": 16384,
"max_input_tokens": 16384,
"max_output_tokens": 4096,
"input_cost_per_token": 0.0000005,
"output_cost_per_token": 0.0000015,
"litellm_provider": "azure",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"azure/gpt-35-turbo-16k": { "azure/gpt-35-turbo-16k": {
"max_tokens": 16385, "max_tokens": 16385,
@ -385,7 +421,8 @@
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"azure/ada": { "azure/ada": {
"max_tokens": 8191, "max_tokens": 8191,
@ -514,11 +551,12 @@
"mode": "chat" "mode": "chat"
}, },
"mistral/mistral-large-latest": { "mistral/mistral-large-latest": {
"max_tokens": 8192, "max_tokens": 32000,
"input_cost_per_token": 0.000008, "input_cost_per_token": 0.000008,
"output_cost_per_token": 0.000024, "output_cost_per_token": 0.000024,
"litellm_provider": "mistral", "litellm_provider": "mistral",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"mistral/mistral-embed": { "mistral/mistral-embed": {
"max_tokens": 8192, "max_tokens": 8192,
@ -676,7 +714,8 @@
"input_cost_per_token": 0.00000025, "input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005, "output_cost_per_token": 0.0000005,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"gemini-1.5-pro": { "gemini-1.5-pro": {
"max_tokens": 8192, "max_tokens": 8192,
@ -1738,6 +1777,23 @@
"output_cost_per_token": 0.0000009, "output_cost_per_token": 0.0000009,
"litellm_provider": "together_ai" "litellm_provider": "together_ai"
}, },
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
"input_cost_per_token": 0.0000006,
"output_cost_per_token": 0.0000006,
"litellm_provider": "together_ai",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"together_ai/mistralai/Mistral-7B-Instruct-v0.1": {
"litellm_provider": "together_ai",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"together_ai/togethercomputer/CodeLlama-34b-Instruct": {
"litellm_provider": "together_ai",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"ollama/llama2": { "ollama/llama2": {
"max_tokens": 4096, "max_tokens": 4096,
"input_cost_per_token": 0.0, "input_cost_per_token": 0.0,
@ -1990,7 +2046,16 @@
"input_cost_per_token": 0.00000015, "input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015, "output_cost_per_token": 0.00000015,
"litellm_provider": "anyscale", "litellm_provider": "anyscale",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
},
"anyscale/Mixtral-8x7B-Instruct-v0.1": {
"max_tokens": 16384,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "anyscale",
"mode": "chat",
"supports_function_calling": true
}, },
"anyscale/HuggingFaceH4/zephyr-7b-beta": { "anyscale/HuggingFaceH4/zephyr-7b-beta": {
"max_tokens": 16384, "max_tokens": 16384,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/6920a121699cde9c.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}(); !function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/a40ad0909dd7838e.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a697f24d60c9c262.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a697f24d60c9c262.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/6920a121699cde9c.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[24143,[\"303\",\"static/chunks/303-d80f23087a9e6aec.js\",\"931\",\"static/chunks/app/page-d4fe4a48cbd3572c.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/6920a121699cde9c.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"eSwVwl_InIrhYtCAqDMKF\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[30280,[\"303\",\"static/chunks/303-d80f23087a9e6aec.js\",\"931\",\"static/chunks/app/page-8f65fc157f538dff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kyOCJPBB9pyUfbMKCAXr-\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[24143,["303","static/chunks/303-d80f23087a9e6aec.js","931","static/chunks/app/page-d4fe4a48cbd3572c.js"],""] 3:I[30280,["303","static/chunks/303-d80f23087a9e6aec.js","931","static/chunks/app/page-8f65fc157f538dff.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["eSwVwl_InIrhYtCAqDMKF",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/6920a121699cde9c.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["kyOCJPBB9pyUfbMKCAXr-",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a40ad0909dd7838e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -221,12 +221,19 @@ class NewUserResponse(GenerateKeyResponse):
class UpdateUserRequest(GenerateRequestBase): class UpdateUserRequest(GenerateRequestBase):
# Note: the defaults of all Params here MUST BE NONE # Note: the defaults of all Params here MUST BE NONE
# else they will get overwritten # else they will get overwritten
user_id: str user_id: Optional[str] = None
user_email: Optional[str] = None
spend: Optional[float] = None spend: Optional[float] = None
metadata: Optional[dict] = None metadata: Optional[dict] = None
user_role: Optional[str] = None user_role: Optional[str] = None
max_budget: Optional[float] = None max_budget: Optional[float] = None
@root_validator(pre=True)
def check_user_info(cls, values):
if values.get("user_id") is None and values.get("user_email") is None:
raise ValueError("Either user id or user email must be provided")
return values
class Member(LiteLLMBase): class Member(LiteLLMBase):
role: Literal["admin", "user"] role: Literal["admin", "user"]
@ -402,6 +409,9 @@ class ConfigGeneralSettings(LiteLLMBase):
None, None,
description="sends alerts if requests hang for 5min+", description="sends alerts if requests hang for 5min+",
) )
ui_access_mode: Optional[Literal["admin_only", "all"]] = Field(
"all", description="Control access to the Proxy UI"
)
class ConfigYAML(LiteLLMBase): class ConfigYAML(LiteLLMBase):

View file

@ -0,0 +1,66 @@
from litellm.integrations.custom_logger import CustomLogger
import litellm
# This file includes the custom callbacks for LiteLLM Proxy
# Once defined, these can be passed in proxy_config.yaml
class MyCustomHandler(CustomLogger):
def log_pre_api_call(self, model, messages, kwargs):
print(f"Pre-API Call") # noqa
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
print(f"Post-API Call") # noqa
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Stream") # noqa
def log_success_event(self, kwargs, response_obj, start_time, end_time):
print("On Success") # noqa
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Failure") # noqa
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"ishaan async_log_success_event") # noqa
# log: key, user, model, prompt, response, tokens, cost
# Access kwargs passed to litellm.completion()
model = kwargs.get("model", None)
messages = kwargs.get("messages", None)
user = kwargs.get("user", None)
# Access litellm_params passed to litellm.completion(), example access `metadata`
litellm_params = kwargs.get("litellm_params", {})
metadata = litellm_params.get(
"metadata", {}
) # headers passed to LiteLLM proxy, can be found here
return
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
try:
print(f"On Async Failure !") # noqa
print("\nkwargs", kwargs) # noqa
# Access kwargs passed to litellm.completion()
model = kwargs.get("model", None)
messages = kwargs.get("messages", None)
user = kwargs.get("user", None)
# Access litellm_params passed to litellm.completion(), example access `metadata`
litellm_params = kwargs.get("litellm_params", {})
metadata = litellm_params.get(
"metadata", {}
) # headers passed to LiteLLM proxy, can be found here
# Acess Exceptions & Traceback
exception_event = kwargs.get("exception", None)
traceback_event = kwargs.get("traceback_exception", None)
# Calculate cost using litellm.completion_cost()
except Exception as e:
print(f"Exception: {e}") # noqa
proxy_handler_instance = MyCustomHandler()
# Set litellm.callbacks = [proxy_handler_instance] on the proxy
# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy

View file

@ -45,7 +45,7 @@ litellm_settings:
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}] fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
success_callback: ['langfuse'] success_callback: ['langfuse']
# setting callback class # setting callback class
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance] callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
general_settings: general_settings:
master_key: sk-1234 master_key: sk-1234

View file

@ -239,6 +239,9 @@ health_check_interval = None
health_check_results = {} health_check_results = {}
queue: List = [] queue: List = []
litellm_proxy_budget_name = "litellm-proxy-budget" litellm_proxy_budget_name = "litellm-proxy-budget"
ui_access_mode: Literal["admin", "all"] = "all"
proxy_budget_rescheduler_min_time = 597
proxy_budget_rescheduler_max_time = 605
### INITIALIZE GLOBAL LOGGING OBJECT ### ### INITIALIZE GLOBAL LOGGING OBJECT ###
proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache) proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache)
### REDIS QUEUE ### ### REDIS QUEUE ###
@ -1406,7 +1409,7 @@ class ProxyConfig:
""" """
Load config values into proxy global state Load config values into proxy global state
""" """
global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, proxy_budget_rescheduler_max_time, proxy_budget_rescheduler_min_time, ui_access_mode
# Load existing config # Load existing config
config = await self.get_config(config_file_path=config_file_path) config = await self.get_config(config_file_path=config_file_path)
@ -1713,6 +1716,17 @@ class ProxyConfig:
) )
## COST TRACKING ## ## COST TRACKING ##
cost_tracking() cost_tracking()
## ADMIN UI ACCESS ##
ui_access_mode = general_settings.get(
"ui_access_mode", "all"
) # can be either ["admin_only" or "all"]
## BUDGET RESCHEDULER ##
proxy_budget_rescheduler_min_time = general_settings.get(
"proxy_budget_rescheduler_min_time", proxy_budget_rescheduler_min_time
)
proxy_budget_rescheduler_max_time = general_settings.get(
"proxy_budget_rescheduler_max_time", proxy_budget_rescheduler_max_time
)
### BACKGROUND HEALTH CHECKS ### ### BACKGROUND HEALTH CHECKS ###
# Enable background health checks # Enable background health checks
use_background_health_checks = general_settings.get( use_background_health_checks = general_settings.get(
@ -2115,10 +2129,9 @@ async def async_data_generator(response, user_api_key_dict):
try: try:
start_time = time.time() start_time = time.time()
async for chunk in response: async for chunk in response:
verbose_proxy_logger.debug(f"returned chunk: {chunk}") chunk = chunk.model_dump_json(exclude_none=True)
assert isinstance(chunk, litellm.ModelResponse)
try: try:
yield f"data: {json.dumps(chunk.model_dump(exclude_none=True))}\n\n" yield f"data: {chunk}\n\n"
except Exception as e: except Exception as e:
yield f"data: {str(e)}\n\n" yield f"data: {str(e)}\n\n"
@ -2197,7 +2210,7 @@ def parse_cache_control(cache_control):
@router.on_event("startup") @router.on_event("startup")
async def startup_event(): async def startup_event():
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time
import json import json
### LOAD MASTER KEY ### ### LOAD MASTER KEY ###
@ -2302,13 +2315,12 @@ async def startup_event():
### CHECK IF VIEW EXISTS ### ### CHECK IF VIEW EXISTS ###
if prisma_client is not None: if prisma_client is not None:
create_view_response = await prisma_client.check_view_exists() create_view_response = await prisma_client.check_view_exists()
print(f"create_view_response: {create_view_response}") # noqa
### START BUDGET SCHEDULER ### ### START BUDGET SCHEDULER ###
if prisma_client is not None: if prisma_client is not None:
scheduler = AsyncIOScheduler() scheduler = AsyncIOScheduler()
interval = random.randint( interval = random.randint(
597, 605 proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time
) # random interval, so multiple workers avoid resetting budget at the same time ) # random interval, so multiple workers avoid resetting budget at the same time
scheduler.add_job( scheduler.add_job(
reset_budget, "interval", seconds=interval, args=[prisma_client] reset_budget, "interval", seconds=interval, args=[prisma_client]
@ -3775,7 +3787,7 @@ async def view_spend_tags(
@router.get( @router.get(
"/spend/logs", "/spend/logs",
tags=["budget & spend Tracking"], tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
responses={ responses={
200: {"model": List[LiteLLM_SpendLogs]}, 200: {"model": List[LiteLLM_SpendLogs]},
@ -3834,13 +3846,55 @@ async def view_spend_logs(
# gettting spend logs from clickhouse # gettting spend logs from clickhouse
from litellm.proxy.enterprise.utils import view_spend_logs_from_clickhouse from litellm.proxy.enterprise.utils import view_spend_logs_from_clickhouse
return await view_spend_logs_from_clickhouse( daily_metrics = await view_daily_metrics(
api_key=api_key,
user_id=user_id,
request_id=request_id,
start_date=start_date, start_date=start_date,
end_date=end_date, end_date=end_date,
) )
# get the top api keys across all daily_metrics
top_api_keys = {} # type: ignore
# make this compatible with the admin UI
for response in daily_metrics.get("daily_spend", {}):
response["startTime"] = response["day"]
response["spend"] = response["daily_spend"]
response["models"] = response["spend_per_model"]
response["users"] = {"ishaan": 0.0}
spend_per_api_key = response["spend_per_api_key"]
# insert spend_per_api_key key, values in response
for key, value in spend_per_api_key.items():
response[key] = value
top_api_keys[key] = top_api_keys.get(key, 0.0) + value
del response["day"]
del response["daily_spend"]
del response["spend_per_model"]
del response["spend_per_api_key"]
# get top 5 api keys
top_api_keys = sorted(top_api_keys.items(), key=lambda x: x[1], reverse=True) # type: ignore
top_api_keys = top_api_keys[:5] # type: ignore
top_api_keys = dict(top_api_keys) # type: ignore
"""
set it like this
{
"key" : key,
"spend:" : spend
}
"""
# we need this to show on the Admin UI
response_keys = []
for key in top_api_keys.items():
response_keys.append(
{
"key": key[0],
"spend": key[1],
}
)
daily_metrics["top_api_keys"] = response_keys
return daily_metrics
global prisma_client global prisma_client
try: try:
verbose_proxy_logger.debug("inside view_spend_logs") verbose_proxy_logger.debug("inside view_spend_logs")
@ -3993,6 +4047,142 @@ async def view_spend_logs(
) )
@router.get(
"/global/spend/logs",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
)
async def global_spend_logs():
"""
[BETA] This is a beta endpoint. It will change.
Use this to get global spend (spend per day for last 30d). Admin-only endpoint
More efficient implementation of /spend/logs, by creating a view over the spend logs table.
"""
global prisma_client
sql_query = """SELECT * FROM "MonthlyGlobalSpend";"""
response = await prisma_client.db.query_raw(query=sql_query)
return response
@router.get(
"/global/spend/keys",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
)
async def global_spend_keys(
limit: int = fastapi.Query(
default=None,
description="Number of keys to get. Will return Top 'n' keys.",
)
):
"""
[BETA] This is a beta endpoint. It will change.
Use this to get the top 'n' keys with the highest spend, ordered by spend.
"""
global prisma_client
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
sql_query = f"""SELECT * FROM "Last30dKeysBySpend" LIMIT {limit};"""
response = await prisma_client.db.query_raw(query=sql_query)
return response
@router.get(
"/global/spend/models",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
)
async def global_spend_models(
limit: int = fastapi.Query(
default=None,
description="Number of models to get. Will return Top 'n' models.",
)
):
"""
[BETA] This is a beta endpoint. It will change.
Use this to get the top 'n' keys with the highest spend, ordered by spend.
"""
global prisma_client
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
sql_query = f"""SELECT * FROM "Last30dModelsBySpend" LIMIT {limit};"""
response = await prisma_client.db.query_raw(query=sql_query)
return response
@router.get(
"/daily_metrics",
summary="Get daily spend metrics",
tags=["budget & spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
)
async def view_daily_metrics(
start_date: Optional[str] = fastapi.Query(
default=None,
description="Time from which to start viewing key spend",
),
end_date: Optional[str] = fastapi.Query(
default=None,
description="Time till which to view key spend",
),
):
"""
[BETA] This is a beta endpoint. It might change without notice.
Please give feedback - https://github.com/BerriAI/litellm/issues
"""
try:
if os.getenv("CLICKHOUSE_HOST") is not None:
# gettting spend logs from clickhouse
from litellm.integrations import clickhouse
return clickhouse.build_daily_metrics()
# create a response object
"""
{
"date": "2022-01-01",
"spend": 0.0,
"users": {},
"models": {},
}
"""
else:
raise Exception(
"Clickhouse: Clickhouse host not set. Required for viewing /daily/metrics"
)
except Exception as e:
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"/spend/logs Error({str(e)})"),
type="internal_error",
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
)
elif isinstance(e, ProxyException):
raise e
raise ProxyException(
message="/spend/logs Error" + str(e),
type="internal_error",
param=getattr(e, "param", "None"),
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
#### USER MANAGEMENT #### #### USER MANAGEMENT ####
@router.post( @router.post(
"/user/new", "/user/new",
@ -4264,12 +4454,32 @@ async def user_update(data: UpdateUserRequest):
): # models default to [], spend defaults to 0, we should not reset these values ): # models default to [], spend defaults to 0, we should not reset these values
non_default_values[k] = v non_default_values[k] = v
response = await prisma_client.update_data( ## ADD USER, IF NEW ##
user_id=data_json["user_id"], if data.user_id is not None and len(data.user_id) == 0:
data=non_default_values, non_default_values["user_id"] = data.user_id # type: ignore
update_key_values=non_default_values, await prisma_client.update_data(
) user_id=data.user_id,
return {"user_id": data_json["user_id"], **non_default_values} data=non_default_values,
table_name="user",
)
elif data.user_email is not None:
non_default_values["user_id"] = str(uuid.uuid4())
non_default_values["user_email"] = data.user_email
## user email is not unique acc. to prisma schema -> future improvement
### for now: check if it exists in db, if not - insert it
existing_user_row = await prisma_client.get_data(
key_val={"user_email": data.user_email},
table_name="user",
query_type="find_all",
)
if existing_user_row is None or (
isinstance(existing_user_row, list) and len(existing_user_row) == 0
):
await prisma_client.insert_data(
data=non_default_values, table_name="user"
)
return non_default_values
# update based on remaining passed in values # update based on remaining passed in values
except Exception as e: except Exception as e:
traceback.print_exc() traceback.print_exc()
@ -4472,6 +4682,40 @@ async def unblock_user(data: BlockUsers):
return {"blocked_users": litellm.blocked_user_list} return {"blocked_users": litellm.blocked_user_list}
@router.get(
"/user/get_users",
tags=["user management"],
dependencies=[Depends(user_api_key_auth)],
)
async def get_users(
role: str = fastapi.Query(
default=None,
description="Either 'proxy_admin', 'proxy_viewer', 'app_owner', 'app_user'",
)
):
"""
[BETA] This could change without notice. Give feedback - https://github.com/BerriAI/litellm/issues
Get all users who are a specific `user_role`.
Used by the UI to populate the user lists.
Currently - admin-only endpoint.
"""
global prisma_client
if prisma_client is None:
raise HTTPException(
status_code=500,
detail={"error": f"No db connected. prisma client={prisma_client}"},
)
all_users = await prisma_client.get_data(
table_name="user", query_type="find_all", key_val={"user_role": role}
)
return all_users
#### TEAM MANAGEMENT #### #### TEAM MANAGEMENT ####
@ -4621,9 +4865,9 @@ async def update_team(
): ):
""" """
[BETA] [BETA]
[DEPRECATED] - use the `/team/member_add` and `/team/member_remove` endpoints instead [RECOMMENDED] - use `/team/member_add` to add new team members instead
You can now add / delete users from a team via /team/update You can now update team budget / rate limits via /team/update
``` ```
curl --location 'http://0.0.0.0:8000/team/update' \ curl --location 'http://0.0.0.0:8000/team/update' \
@ -5620,7 +5864,7 @@ def get_image():
@app.get("/sso/callback", tags=["experimental"]) @app.get("/sso/callback", tags=["experimental"])
async def auth_callback(request: Request): async def auth_callback(request: Request):
"""Verify login""" """Verify login"""
global general_settings global general_settings, ui_access_mode
microsoft_client_id = os.getenv("MICROSOFT_CLIENT_ID", None) microsoft_client_id = os.getenv("MICROSOFT_CLIENT_ID", None)
google_client_id = os.getenv("GOOGLE_CLIENT_ID", None) google_client_id = os.getenv("GOOGLE_CLIENT_ID", None)
generic_client_id = os.getenv("GENERIC_CLIENT_ID", None) generic_client_id = os.getenv("GENERIC_CLIENT_ID", None)
@ -5811,6 +6055,7 @@ async def auth_callback(request: Request):
"user_email": user_email, "user_email": user_email,
} }
try: try:
user_role = None
if prisma_client is not None: if prisma_client is not None:
user_info = await prisma_client.get_data(user_id=user_id, table_name="user") user_info = await prisma_client.get_data(user_id=user_id, table_name="user")
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
@ -5822,6 +6067,7 @@ async def auth_callback(request: Request):
"user_id": getattr(user_info, "user_id", user_id), "user_id": getattr(user_info, "user_id", user_id),
"user_email": getattr(user_info, "user_id", user_email), "user_email": getattr(user_info, "user_id", user_email),
} }
user_role = getattr(user_info, "user_role", None)
elif litellm.default_user_params is not None and isinstance( elif litellm.default_user_params is not None and isinstance(
litellm.default_user_params, dict litellm.default_user_params, dict
): ):
@ -5844,13 +6090,27 @@ async def auth_callback(request: Request):
key = response["token"] # type: ignore key = response["token"] # type: ignore
user_id = response["user_id"] # type: ignore user_id = response["user_id"] # type: ignore
litellm_dashboard_ui = "/ui/" litellm_dashboard_ui = "/ui/"
user_role = "app_owner" user_role = user_role or "app_owner"
if ( if (
os.getenv("PROXY_ADMIN_ID", None) is not None os.getenv("PROXY_ADMIN_ID", None) is not None
and os.environ["PROXY_ADMIN_ID"] == user_id and os.environ["PROXY_ADMIN_ID"] == user_id
): ):
# checks if user is admin # checks if user is admin
user_role = "app_admin" user_role = "app_admin"
verbose_proxy_logger.debug(
f"user_role: {user_role}; ui_access_mode: {ui_access_mode}"
)
## CHECK IF ROLE ALLOWED TO USE PROXY ##
if ui_access_mode == "admin_only" and "admin" not in user_role:
verbose_proxy_logger.debug("EXCEPTION RAISED")
raise HTTPException(
status_code=401,
detail={
"error": f"User not allowed to access proxy. User role={user_role}, proxy mode={ui_access_mode}"
},
)
import jwt import jwt
jwt_token = jwt.encode( jwt_token = jwt.encode(

View file

@ -489,18 +489,20 @@ class PrismaClient:
) )
async def check_view_exists(self): async def check_view_exists(self):
""" """
Checks if the LiteLLM_VerificationTokenView exists in the user's db. Checks if the LiteLLM_VerificationTokenView and MonthlyGlobalSpend exists in the user's db.
This is used for getting the token + team data in user_api_key_auth LiteLLM_VerificationTokenView: This view is used for getting the token + team data in user_api_key_auth
MonthlyGlobalSpend: This view is used for the admin view to see global spend for this month
If the view doesn't exist, one will be created. If the view doesn't exist, one will be created.
""" """
try: try:
# Try to select one row from the view # Try to select one row from the view
await self.db.execute_raw( await self.db.query_raw(
"""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1""" """SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1"""
) )
return "LiteLLM_VerificationTokenView Exists!" print("LiteLLM_VerificationTokenView Exists!") # noqa
except Exception as e: except Exception as e:
# If an error occurs, the view does not exist, so create it # If an error occurs, the view does not exist, so create it
value = await self.health_check() value = await self.health_check()
@ -518,7 +520,29 @@ class PrismaClient:
""" """
) )
return "LiteLLM_VerificationTokenView Created!" print("LiteLLM_VerificationTokenView Created!") # noqa
try:
await self.db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""")
print("MonthlyGlobalSpend Exists!") # noqa
except Exception as e:
sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS
SELECT
DATE("startTime") AS date,
SUM("spend") AS spend
FROM
"LiteLLM_SpendLogs"
WHERE
"startTime" >= (CURRENT_DATE - INTERVAL '30 days')
GROUP BY
DATE("startTime");
"""
await self.db.execute_raw(query=sql_query)
print("MonthlyGlobalSpend Created!") # noqa
return
@backoff.on_exception( @backoff.on_exception(
backoff.expo, backoff.expo,

View file

@ -1,253 +1,254 @@
import sys ## @pytest.mark.skip(reason="AWS Suspended Account")
import os # import sys
import io, asyncio # import os
# import io, asyncio
# import logging # # import logging
# logging.basicConfig(level=logging.DEBUG) # # logging.basicConfig(level=logging.DEBUG)
sys.path.insert(0, os.path.abspath("../..")) # sys.path.insert(0, os.path.abspath("../.."))
from litellm import completion # from litellm import completion
import litellm # import litellm
litellm.num_retries = 3 # litellm.num_retries = 3
import time, random # import time, random
import pytest # import pytest
def test_s3_logging(): # def test_s3_logging():
# all s3 requests need to be in one test function # # all s3 requests need to be in one test function
# since we are modifying stdout, and pytests runs tests in parallel # # since we are modifying stdout, and pytests runs tests in parallel
# on circle ci - we only test litellm.acompletion() # # on circle ci - we only test litellm.acompletion()
try: # try:
# redirect stdout to log_file # # redirect stdout to log_file
litellm.cache = litellm.Cache( # litellm.cache = litellm.Cache(
type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2" # type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
) # )
litellm.success_callback = ["s3"] # litellm.success_callback = ["s3"]
litellm.s3_callback_params = { # litellm.s3_callback_params = {
"s3_bucket_name": "litellm-logs", # "s3_bucket_name": "litellm-logs",
"s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY", # "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
"s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID", # "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
} # }
litellm.set_verbose = True # litellm.set_verbose = True
print("Testing async s3 logging") # print("Testing async s3 logging")
expected_keys = [] # expected_keys = []
import time # import time
curr_time = str(time.time()) # curr_time = str(time.time())
async def _test(): # async def _test():
return await litellm.acompletion( # return await litellm.acompletion(
model="gpt-3.5-turbo", # model="gpt-3.5-turbo",
messages=[{"role": "user", "content": f"This is a test {curr_time}"}], # messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
max_tokens=10, # max_tokens=10,
temperature=0.7, # temperature=0.7,
user="ishaan-2", # user="ishaan-2",
) # )
response = asyncio.run(_test()) # response = asyncio.run(_test())
print(f"response: {response}") # print(f"response: {response}")
expected_keys.append(response.id) # expected_keys.append(response.id)
async def _test(): # async def _test():
return await litellm.acompletion( # return await litellm.acompletion(
model="gpt-3.5-turbo", # model="gpt-3.5-turbo",
messages=[{"role": "user", "content": f"This is a test {curr_time}"}], # messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
max_tokens=10, # max_tokens=10,
temperature=0.7, # temperature=0.7,
user="ishaan-2", # user="ishaan-2",
) # )
response = asyncio.run(_test()) # response = asyncio.run(_test())
expected_keys.append(response.id) # expected_keys.append(response.id)
print(f"response: {response}") # print(f"response: {response}")
time.sleep(5) # wait 5s for logs to land # time.sleep(5) # wait 5s for logs to land
import boto3 # import boto3
s3 = boto3.client("s3") # s3 = boto3.client("s3")
bucket_name = "litellm-logs" # bucket_name = "litellm-logs"
# List objects in the bucket # # List objects in the bucket
response = s3.list_objects(Bucket=bucket_name) # response = s3.list_objects(Bucket=bucket_name)
# Sort the objects based on the LastModified timestamp # # Sort the objects based on the LastModified timestamp
objects = sorted( # objects = sorted(
response["Contents"], key=lambda x: x["LastModified"], reverse=True # response["Contents"], key=lambda x: x["LastModified"], reverse=True
) # )
# Get the keys of the most recent objects # # Get the keys of the most recent objects
most_recent_keys = [obj["Key"] for obj in objects] # most_recent_keys = [obj["Key"] for obj in objects]
print(most_recent_keys) # print(most_recent_keys)
# for each key, get the part before "-" as the key. Do it safely # # for each key, get the part before "-" as the key. Do it safely
cleaned_keys = [] # cleaned_keys = []
for key in most_recent_keys: # for key in most_recent_keys:
split_key = key.split("_") # split_key = key.split("_")
if len(split_key) < 2: # if len(split_key) < 2:
continue # continue
cleaned_keys.append(split_key[1]) # cleaned_keys.append(split_key[1])
print("\n most recent keys", most_recent_keys) # print("\n most recent keys", most_recent_keys)
print("\n cleaned keys", cleaned_keys) # print("\n cleaned keys", cleaned_keys)
print("\n Expected keys: ", expected_keys) # print("\n Expected keys: ", expected_keys)
matches = 0 # matches = 0
for key in expected_keys: # for key in expected_keys:
key += ".json" # key += ".json"
assert key in cleaned_keys # assert key in cleaned_keys
if key in cleaned_keys: # if key in cleaned_keys:
matches += 1 # matches += 1
# remove the match key # # remove the match key
cleaned_keys.remove(key) # cleaned_keys.remove(key)
# this asserts we log, the first request + the 2nd cached request # # this asserts we log, the first request + the 2nd cached request
print("we had two matches ! passed ", matches) # print("we had two matches ! passed ", matches)
assert matches == 2 # assert matches == 2
try: # try:
# cleanup s3 bucket in test # # cleanup s3 bucket in test
for key in most_recent_keys: # for key in most_recent_keys:
s3.delete_object(Bucket=bucket_name, Key=key) # s3.delete_object(Bucket=bucket_name, Key=key)
except: # except:
# don't let cleanup fail a test # # don't let cleanup fail a test
pass # pass
except Exception as e: # except Exception as e:
pytest.fail(f"An exception occurred - {e}") # pytest.fail(f"An exception occurred - {e}")
finally: # finally:
# post, close log file and verify # # post, close log file and verify
# Reset stdout to the original value # # Reset stdout to the original value
print("Passed! Testing async s3 logging") # print("Passed! Testing async s3 logging")
# test_s3_logging() # # test_s3_logging()
def test_s3_logging_async(): # def test_s3_logging_async():
# this tests time added to make s3 logging calls, vs just acompletion calls # # this tests time added to make s3 logging calls, vs just acompletion calls
try: # try:
litellm.set_verbose = True # litellm.set_verbose = True
# Make 5 calls with an empty success_callback # # Make 5 calls with an empty success_callback
litellm.success_callback = [] # litellm.success_callback = []
start_time_empty_callback = asyncio.run(make_async_calls()) # start_time_empty_callback = asyncio.run(make_async_calls())
print("done with no callback test") # print("done with no callback test")
print("starting s3 logging load test") # print("starting s3 logging load test")
# Make 5 calls with success_callback set to "langfuse" # # Make 5 calls with success_callback set to "langfuse"
litellm.success_callback = ["s3"] # litellm.success_callback = ["s3"]
litellm.s3_callback_params = { # litellm.s3_callback_params = {
"s3_bucket_name": "litellm-logs", # "s3_bucket_name": "litellm-logs",
"s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY", # "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
"s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID", # "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
} # }
start_time_s3 = asyncio.run(make_async_calls()) # start_time_s3 = asyncio.run(make_async_calls())
print("done with s3 test") # print("done with s3 test")
# Compare the time for both scenarios # # Compare the time for both scenarios
print(f"Time taken with success_callback='s3': {start_time_s3}") # print(f"Time taken with success_callback='s3': {start_time_s3}")
print(f"Time taken with empty success_callback: {start_time_empty_callback}") # print(f"Time taken with empty success_callback: {start_time_empty_callback}")
# assert the diff is not more than 1 second # # assert the diff is not more than 1 second
assert abs(start_time_s3 - start_time_empty_callback) < 1 # assert abs(start_time_s3 - start_time_empty_callback) < 1
except litellm.Timeout as e: # except litellm.Timeout as e:
pass # pass
except Exception as e: # except Exception as e:
pytest.fail(f"An exception occurred - {e}") # pytest.fail(f"An exception occurred - {e}")
async def make_async_calls(): # async def make_async_calls():
tasks = [] # tasks = []
for _ in range(5): # for _ in range(5):
task = asyncio.create_task( # task = asyncio.create_task(
litellm.acompletion( # litellm.acompletion(
model="azure/chatgpt-v-2", # model="azure/chatgpt-v-2",
messages=[{"role": "user", "content": "This is a test"}], # messages=[{"role": "user", "content": "This is a test"}],
max_tokens=5, # max_tokens=5,
temperature=0.7, # temperature=0.7,
timeout=5, # timeout=5,
user="langfuse_latency_test_user", # user="langfuse_latency_test_user",
mock_response="It's simple to use and easy to get started", # mock_response="It's simple to use and easy to get started",
) # )
) # )
tasks.append(task) # tasks.append(task)
# Measure the start time before running the tasks # # Measure the start time before running the tasks
start_time = asyncio.get_event_loop().time() # start_time = asyncio.get_event_loop().time()
# Wait for all tasks to complete # # Wait for all tasks to complete
responses = await asyncio.gather(*tasks) # responses = await asyncio.gather(*tasks)
# Print the responses when tasks return # # Print the responses when tasks return
for idx, response in enumerate(responses): # for idx, response in enumerate(responses):
print(f"Response from Task {idx + 1}: {response}") # print(f"Response from Task {idx + 1}: {response}")
# Calculate the total time taken # # Calculate the total time taken
total_time = asyncio.get_event_loop().time() - start_time # total_time = asyncio.get_event_loop().time() - start_time
return total_time # return total_time
def test_s3_logging_r2(): # def test_s3_logging_r2():
# all s3 requests need to be in one test function # # all s3 requests need to be in one test function
# since we are modifying stdout, and pytests runs tests in parallel # # since we are modifying stdout, and pytests runs tests in parallel
# on circle ci - we only test litellm.acompletion() # # on circle ci - we only test litellm.acompletion()
try: # try:
# redirect stdout to log_file # # redirect stdout to log_file
# litellm.cache = litellm.Cache( # # litellm.cache = litellm.Cache(
# type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2" # # type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2"
# ) # # )
litellm.set_verbose = True # litellm.set_verbose = True
from litellm._logging import verbose_logger # from litellm._logging import verbose_logger
import logging # import logging
verbose_logger.setLevel(level=logging.DEBUG) # verbose_logger.setLevel(level=logging.DEBUG)
litellm.success_callback = ["s3"] # litellm.success_callback = ["s3"]
litellm.s3_callback_params = { # litellm.s3_callback_params = {
"s3_bucket_name": "litellm-r2-bucket", # "s3_bucket_name": "litellm-r2-bucket",
"s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY", # "s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY",
"s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID", # "s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID",
"s3_endpoint_url": "os.environ/R2_S3_URL", # "s3_endpoint_url": "os.environ/R2_S3_URL",
"s3_region_name": "os.environ/R2_S3_REGION_NAME", # "s3_region_name": "os.environ/R2_S3_REGION_NAME",
} # }
print("Testing async s3 logging") # print("Testing async s3 logging")
expected_keys = [] # expected_keys = []
import time # import time
curr_time = str(time.time()) # curr_time = str(time.time())
async def _test(): # async def _test():
return await litellm.acompletion( # return await litellm.acompletion(
model="gpt-3.5-turbo", # model="gpt-3.5-turbo",
messages=[{"role": "user", "content": f"This is a test {curr_time}"}], # messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
max_tokens=10, # max_tokens=10,
temperature=0.7, # temperature=0.7,
user="ishaan-2", # user="ishaan-2",
) # )
response = asyncio.run(_test()) # response = asyncio.run(_test())
print(f"response: {response}") # print(f"response: {response}")
expected_keys.append(response.id) # expected_keys.append(response.id)
import boto3 # import boto3
s3 = boto3.client( # s3 = boto3.client(
"s3", # "s3",
endpoint_url=os.getenv("R2_S3_URL"), # endpoint_url=os.getenv("R2_S3_URL"),
region_name=os.getenv("R2_S3_REGION_NAME"), # region_name=os.getenv("R2_S3_REGION_NAME"),
aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"), # aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"),
aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"), # aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"),
) # )
bucket_name = "litellm-r2-bucket" # bucket_name = "litellm-r2-bucket"
# List objects in the bucket # # List objects in the bucket
response = s3.list_objects(Bucket=bucket_name) # response = s3.list_objects(Bucket=bucket_name)
except Exception as e: # except Exception as e:
pytest.fail(f"An exception occurred - {e}") # pytest.fail(f"An exception occurred - {e}")
finally: # finally:
# post, close log file and verify # # post, close log file and verify
# Reset stdout to the original value # # Reset stdout to the original value
print("Passed! Testing async s3 logging") # print("Passed! Testing async s3 logging")

View file

@ -110,6 +110,7 @@ def test_vertex_ai():
"code-bison@001", "code-bison@001",
"text-bison@001", "text-bison@001",
"gemini-1.5-pro", "gemini-1.5-pro",
"gemini-1.5-pro-preview-0215",
"gemini-1.5-pro-vision", "gemini-1.5-pro-vision",
]: ]:
# our account does not have access to this model # our account does not have access to this model
@ -129,6 +130,8 @@ def test_vertex_ai():
f"response.choices[0].finish_reason: {response.choices[0].finish_reason}" f"response.choices[0].finish_reason: {response.choices[0].finish_reason}"
) )
assert response.choices[0].finish_reason in litellm._openai_finish_reasons assert response.choices[0].finish_reason in litellm._openai_finish_reasons
except litellm.RateLimitError as e:
pass
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@ -160,6 +163,7 @@ def test_vertex_ai_stream():
"code-bison@001", "code-bison@001",
"text-bison@001", "text-bison@001",
"gemini-1.5-pro", "gemini-1.5-pro",
"gemini-1.5-pro-preview-0215",
"gemini-1.5-pro-vision", "gemini-1.5-pro-vision",
]: ]:
# our account does not have access to this model # our account does not have access to this model
@ -181,6 +185,8 @@ def test_vertex_ai_stream():
assert type(content) == str assert type(content) == str
# pass # pass
assert len(completed_str) > 4 assert len(completed_str) > 4
except litellm.RateLimitError as e:
pass
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@ -211,6 +217,7 @@ async def test_async_vertexai_response():
"code-bison@001", "code-bison@001",
"text-bison@001", "text-bison@001",
"gemini-1.5-pro", "gemini-1.5-pro",
"gemini-1.5-pro-preview-0215",
"gemini-1.5-pro-vision", "gemini-1.5-pro-vision",
]: ]:
# our account does not have access to this model # our account does not have access to this model
@ -255,6 +262,7 @@ async def test_async_vertexai_streaming_response():
"code-bison@001", "code-bison@001",
"text-bison@001", "text-bison@001",
"gemini-1.5-pro", "gemini-1.5-pro",
"gemini-1.5-pro-preview-0215",
"gemini-1.5-pro-vision", "gemini-1.5-pro-vision",
]: ]:
# our account does not have access to this model # our account does not have access to this model

View file

@ -193,16 +193,26 @@ async def test_hf_completion_tgi():
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except litellm.APIError as e: except litellm.APIError as e:
print("got an api error")
pass pass
except litellm.Timeout as e: except litellm.Timeout as e:
print("got a timeout error")
pass
except litellm.RateLimitError as e:
# this will catch the model is overloaded error
print("got a rate limit error")
pass pass
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") if "Model is overloaded" in str(e):
pass
else:
pytest.fail(f"Error occurred: {e}")
# test_get_cloudflare_response_streaming() # test_get_cloudflare_response_streaming()
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_completion_sagemaker(): async def test_completion_sagemaker():
# litellm.set_verbose=True # litellm.set_verbose=True

View file

@ -1,257 +1,259 @@
import sys, os # @pytest.mark.skip(reason="AWS Suspended Account")
import traceback # import sys, os
from dotenv import load_dotenv # import traceback
# from dotenv import load_dotenv
load_dotenv() # load_dotenv()
import os, io # import os, io
sys.path.insert( # sys.path.insert(
0, os.path.abspath("../..") # 0, os.path.abspath("../..")
) # Adds the parent directory to the system path # ) # Adds the parent directory to the system path
import pytest # import pytest
import litellm # import litellm
from litellm import embedding, completion, completion_cost, Timeout # from litellm import embedding, completion, completion_cost, Timeout
from litellm import RateLimitError # from litellm import RateLimitError
# litellm.num_retries = 3 # # litellm.num_retries = 3
litellm.cache = None # litellm.cache = None
litellm.success_callback = [] # litellm.success_callback = []
user_message = "Write a short poem about the sky" # user_message = "Write a short poem about the sky"
messages = [{"content": user_message, "role": "user"}] # messages = [{"content": user_message, "role": "user"}]
@pytest.fixture(autouse=True) # @pytest.fixture(autouse=True)
def reset_callbacks(): # def reset_callbacks():
print("\npytest fixture - resetting callbacks") # print("\npytest fixture - resetting callbacks")
litellm.success_callback = [] # litellm.success_callback = []
litellm._async_success_callback = [] # litellm._async_success_callback = []
litellm.failure_callback = [] # litellm.failure_callback = []
litellm.callbacks = [] # litellm.callbacks = []
def test_completion_bedrock_claude_completion_auth(): # def test_completion_bedrock_claude_completion_auth():
print("calling bedrock claude completion params auth") # print("calling bedrock claude completion params auth")
import os # import os
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] # aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] # aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"] # aws_region_name = os.environ["AWS_REGION_NAME"]
os.environ.pop("AWS_ACCESS_KEY_ID", None) # os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None) # os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
os.environ.pop("AWS_REGION_NAME", None) # os.environ.pop("AWS_REGION_NAME", None)
try: # try:
response = completion( # response = completion(
model="bedrock/anthropic.claude-instant-v1", # model="bedrock/anthropic.claude-instant-v1",
messages=messages, # messages=messages,
max_tokens=10, # max_tokens=10,
temperature=0.1, # temperature=0.1,
aws_access_key_id=aws_access_key_id, # aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key, # aws_secret_access_key=aws_secret_access_key,
aws_region_name=aws_region_name, # aws_region_name=aws_region_name,
) # )
# Add any assertions here to check the response # # Add any assertions here to check the response
print(response) # print(response)
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id # os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key # os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_REGION_NAME"] = aws_region_name # os.environ["AWS_REGION_NAME"] = aws_region_name
except RateLimitError: # except RateLimitError:
pass # pass
except Exception as e: # except Exception as e:
pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_completion_auth() # # test_completion_bedrock_claude_completion_auth()
def test_completion_bedrock_claude_2_1_completion_auth(): # def test_completion_bedrock_claude_2_1_completion_auth():
print("calling bedrock claude 2.1 completion params auth") # print("calling bedrock claude 2.1 completion params auth")
import os # import os
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] # aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] # aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"] # aws_region_name = os.environ["AWS_REGION_NAME"]
os.environ.pop("AWS_ACCESS_KEY_ID", None) # os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None) # os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
os.environ.pop("AWS_REGION_NAME", None) # os.environ.pop("AWS_REGION_NAME", None)
try: # try:
response = completion( # response = completion(
model="bedrock/anthropic.claude-v2:1", # model="bedrock/anthropic.claude-v2:1",
messages=messages, # messages=messages,
max_tokens=10, # max_tokens=10,
temperature=0.1, # temperature=0.1,
aws_access_key_id=aws_access_key_id, # aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key, # aws_secret_access_key=aws_secret_access_key,
aws_region_name=aws_region_name, # aws_region_name=aws_region_name,
) # )
# Add any assertions here to check the response # # Add any assertions here to check the response
print(response) # print(response)
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id # os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key # os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_REGION_NAME"] = aws_region_name # os.environ["AWS_REGION_NAME"] = aws_region_name
except RateLimitError: # except RateLimitError:
pass # pass
except Exception as e: # except Exception as e:
pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_2_1_completion_auth() # # test_completion_bedrock_claude_2_1_completion_auth()
def test_completion_bedrock_claude_external_client_auth(): # def test_completion_bedrock_claude_external_client_auth():
print("\ncalling bedrock claude external client auth") # print("\ncalling bedrock claude external client auth")
import os # import os
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] # aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] # aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"] # aws_region_name = os.environ["AWS_REGION_NAME"]
os.environ.pop("AWS_ACCESS_KEY_ID", None) # os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None) # os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
os.environ.pop("AWS_REGION_NAME", None) # os.environ.pop("AWS_REGION_NAME", None)
try: # try:
import boto3 # import boto3
litellm.set_verbose = True # litellm.set_verbose = True
bedrock = boto3.client( # bedrock = boto3.client(
service_name="bedrock-runtime", # service_name="bedrock-runtime",
region_name=aws_region_name, # region_name=aws_region_name,
aws_access_key_id=aws_access_key_id, # aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key, # aws_secret_access_key=aws_secret_access_key,
endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com", # endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
) # )
response = completion( # response = completion(
model="bedrock/anthropic.claude-instant-v1", # model="bedrock/anthropic.claude-instant-v1",
messages=messages, # messages=messages,
max_tokens=10, # max_tokens=10,
temperature=0.1, # temperature=0.1,
aws_bedrock_client=bedrock, # aws_bedrock_client=bedrock,
) # )
# Add any assertions here to check the response # # Add any assertions here to check the response
print(response) # print(response)
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id # os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key # os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_REGION_NAME"] = aws_region_name # os.environ["AWS_REGION_NAME"] = aws_region_name
except RateLimitError: # except RateLimitError:
pass # pass
except Exception as e: # except Exception as e:
pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_external_client_auth() # # test_completion_bedrock_claude_external_client_auth()
def test_completion_bedrock_claude_sts_client_auth(): # @pytest.mark.skip(reason="Expired token, need to renew")
print("\ncalling bedrock claude external client auth") # def test_completion_bedrock_claude_sts_client_auth():
import os # print("\ncalling bedrock claude external client auth")
# import os
aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"] # aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"] # aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"] # aws_region_name = os.environ["AWS_REGION_NAME"]
aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"] # aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
try: # try:
import boto3 # import boto3
litellm.set_verbose = True # litellm.set_verbose = True
response = completion( # response = completion(
model="bedrock/anthropic.claude-instant-v1", # model="bedrock/anthropic.claude-instant-v1",
messages=messages, # messages=messages,
max_tokens=10, # max_tokens=10,
temperature=0.1, # temperature=0.1,
aws_region_name=aws_region_name, # aws_region_name=aws_region_name,
aws_access_key_id=aws_access_key_id, # aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key, # aws_secret_access_key=aws_secret_access_key,
aws_role_name=aws_role_name, # aws_role_name=aws_role_name,
aws_session_name="my-test-session", # aws_session_name="my-test-session",
) # )
response = embedding( # response = embedding(
model="cohere.embed-multilingual-v3", # model="cohere.embed-multilingual-v3",
input=["hello world"], # input=["hello world"],
aws_region_name="us-east-1", # aws_region_name="us-east-1",
aws_access_key_id=aws_access_key_id, # aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key, # aws_secret_access_key=aws_secret_access_key,
aws_role_name=aws_role_name, # aws_role_name=aws_role_name,
aws_session_name="my-test-session", # aws_session_name="my-test-session",
) # )
response = completion( # response = completion(
model="gpt-3.5-turbo", # model="gpt-3.5-turbo",
messages=messages, # messages=messages,
aws_region_name="us-east-1", # aws_region_name="us-east-1",
aws_access_key_id=aws_access_key_id, # aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key, # aws_secret_access_key=aws_secret_access_key,
aws_role_name=aws_role_name, # aws_role_name=aws_role_name,
aws_session_name="my-test-session", # aws_session_name="my-test-session",
) # )
# Add any assertions here to check the response # # Add any assertions here to check the response
print(response) # print(response)
except RateLimitError: # except RateLimitError:
pass # pass
except Exception as e: # except Exception as e:
pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")
test_completion_bedrock_claude_sts_client_auth() # # test_completion_bedrock_claude_sts_client_auth()
def test_provisioned_throughput(): # def test_provisioned_throughput():
try: # try:
litellm.set_verbose = True # litellm.set_verbose = True
import botocore, json, io # import botocore, json, io
import botocore.session # import botocore.session
from botocore.stub import Stubber # from botocore.stub import Stubber
bedrock_client = botocore.session.get_session().create_client( # bedrock_client = botocore.session.get_session().create_client(
"bedrock-runtime", region_name="us-east-1" # "bedrock-runtime", region_name="us-east-1"
) # )
expected_params = { # expected_params = {
"accept": "application/json", # "accept": "application/json",
"body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", ' # "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
'"max_tokens_to_sample": 256}', # '"max_tokens_to_sample": 256}',
"contentType": "application/json", # "contentType": "application/json",
"modelId": "provisioned-model-arn", # "modelId": "provisioned-model-arn",
} # }
response_from_bedrock = { # response_from_bedrock = {
"body": io.StringIO( # "body": io.StringIO(
json.dumps( # json.dumps(
{ # {
"completion": " Here is a short poem about the sky:", # "completion": " Here is a short poem about the sky:",
"stop_reason": "max_tokens", # "stop_reason": "max_tokens",
"stop": None, # "stop": None,
} # }
) # )
), # ),
"contentType": "contentType", # "contentType": "contentType",
"ResponseMetadata": {"HTTPStatusCode": 200}, # "ResponseMetadata": {"HTTPStatusCode": 200},
} # }
with Stubber(bedrock_client) as stubber: # with Stubber(bedrock_client) as stubber:
stubber.add_response( # stubber.add_response(
"invoke_model", # "invoke_model",
service_response=response_from_bedrock, # service_response=response_from_bedrock,
expected_params=expected_params, # expected_params=expected_params,
) # )
response = litellm.completion( # response = litellm.completion(
model="bedrock/anthropic.claude-instant-v1", # model="bedrock/anthropic.claude-instant-v1",
model_id="provisioned-model-arn", # model_id="provisioned-model-arn",
messages=[{"content": "Hello, how are you?", "role": "user"}], # messages=[{"content": "Hello, how are you?", "role": "user"}],
aws_bedrock_client=bedrock_client, # aws_bedrock_client=bedrock_client,
) # )
print("response stubbed", response) # print("response stubbed", response)
except Exception as e: # except Exception as e:
pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")
# test_provisioned_throughput() # # test_provisioned_throughput()

View file

@ -546,6 +546,7 @@ def test_redis_cache_acompletion_stream():
# test_redis_cache_acompletion_stream() # test_redis_cache_acompletion_stream()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_redis_cache_acompletion_stream_bedrock(): def test_redis_cache_acompletion_stream_bedrock():
import asyncio import asyncio
@ -571,7 +572,7 @@ def test_redis_cache_acompletion_stream_bedrock():
async def call1(): async def call1():
nonlocal response_1_content nonlocal response_1_content
response1 = await litellm.acompletion( response1 = await litellm.acompletion(
model="bedrock/anthropic.claude-v1", model="bedrock/anthropic.claude-v2",
messages=messages, messages=messages,
max_tokens=40, max_tokens=40,
temperature=1, temperature=1,
@ -589,7 +590,7 @@ def test_redis_cache_acompletion_stream_bedrock():
async def call2(): async def call2():
nonlocal response_2_content nonlocal response_2_content
response2 = await litellm.acompletion( response2 = await litellm.acompletion(
model="bedrock/anthropic.claude-v1", model="bedrock/anthropic.claude-v2",
messages=messages, messages=messages,
max_tokens=40, max_tokens=40,
temperature=1, temperature=1,
@ -615,6 +616,7 @@ def test_redis_cache_acompletion_stream_bedrock():
raise e raise e
@pytest.mark.skip(reason="AWS Suspended Account")
def test_s3_cache_acompletion_stream_azure(): def test_s3_cache_acompletion_stream_azure():
import asyncio import asyncio
@ -697,6 +699,7 @@ def test_s3_cache_acompletion_stream_azure():
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.skip(reason="AWS Suspended Account")
async def test_s3_cache_acompletion_azure(): async def test_s3_cache_acompletion_azure():
import asyncio import asyncio
import logging import logging

View file

@ -1404,6 +1404,7 @@ def test_customprompt_together_ai():
# test_customprompt_together_ai() # test_customprompt_together_ai()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_sagemaker(): def test_completion_sagemaker():
try: try:
litellm.set_verbose = True litellm.set_verbose = True
@ -1429,6 +1430,7 @@ def test_completion_sagemaker():
# test_completion_sagemaker() # test_completion_sagemaker()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_sagemaker_stream(): def test_completion_sagemaker_stream():
try: try:
litellm.set_verbose = False litellm.set_verbose = False
@ -1459,6 +1461,7 @@ def test_completion_sagemaker_stream():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_chat_sagemaker(): def test_completion_chat_sagemaker():
try: try:
messages = [{"role": "user", "content": "Hey, how's it going?"}] messages = [{"role": "user", "content": "Hey, how's it going?"}]
@ -1483,6 +1486,7 @@ def test_completion_chat_sagemaker():
# test_completion_chat_sagemaker() # test_completion_chat_sagemaker()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_chat_sagemaker_mistral(): def test_completion_chat_sagemaker_mistral():
try: try:
messages = [{"role": "user", "content": "Hey, how's it going?"}] messages = [{"role": "user", "content": "Hey, how's it going?"}]
@ -1501,6 +1505,7 @@ def test_completion_chat_sagemaker_mistral():
# test_completion_chat_sagemaker_mistral() # test_completion_chat_sagemaker_mistral()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_titan_null_response(): def test_completion_bedrock_titan_null_response():
try: try:
response = completion( response = completion(
@ -1526,6 +1531,7 @@ def test_completion_bedrock_titan_null_response():
pytest.fail(f"An error occurred - {str(e)}") pytest.fail(f"An error occurred - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_titan(): def test_completion_bedrock_titan():
try: try:
response = completion( response = completion(
@ -1547,6 +1553,7 @@ def test_completion_bedrock_titan():
# test_completion_bedrock_titan() # test_completion_bedrock_titan()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_claude(): def test_completion_bedrock_claude():
print("calling claude") print("calling claude")
try: try:
@ -1568,6 +1575,7 @@ def test_completion_bedrock_claude():
# test_completion_bedrock_claude() # test_completion_bedrock_claude()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_cohere(): def test_completion_bedrock_cohere():
print("calling bedrock cohere") print("calling bedrock cohere")
litellm.set_verbose = True litellm.set_verbose = True
@ -1954,12 +1962,15 @@ def test_completion_gemini():
messages = [{"role": "user", "content": "Hey, how's it going?"}] messages = [{"role": "user", "content": "Hey, how's it going?"}]
try: try:
response = completion(model=model_name, messages=messages) response = completion(model=model_name, messages=messages)
# Add any assertions here to check the response # Add any assertions,here to check the response
print(response) print(response)
except litellm.APIError as e: except litellm.APIError as e:
pass pass
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") if "InternalServerError" in str(e):
pass
else:
pytest.fail(f"Error occurred: {e}")
# test_completion_gemini() # test_completion_gemini()
@ -1974,8 +1985,13 @@ async def test_acompletion_gemini():
response = await litellm.acompletion(model=model_name, messages=messages) response = await litellm.acompletion(model=model_name, messages=messages)
# Add any assertions here to check the response # Add any assertions here to check the response
print(f"response: {response}") print(f"response: {response}")
except litellm.APIError as e:
pass
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") if "InternalServerError" in str(e):
pass
else:
pytest.fail(f"Error occurred: {e}")
# Palm tests # Palm tests

View file

@ -171,6 +171,7 @@ def test_cost_openai_image_gen():
assert cost == 0.019922944 assert cost == 0.019922944
@pytest.mark.skip(reason="AWS Suspended Account")
def test_cost_bedrock_pricing(): def test_cost_bedrock_pricing():
""" """
- get pricing specific to region for a model - get pricing specific to region for a model
@ -226,6 +227,7 @@ def test_cost_bedrock_pricing():
assert cost == predicted_cost assert cost == predicted_cost
@pytest.mark.skip(reason="AWS disabled our access")
def test_cost_bedrock_pricing_actual_calls(): def test_cost_bedrock_pricing_actual_calls():
litellm.set_verbose = True litellm.set_verbose = True
model = "anthropic.claude-instant-v1" model = "anthropic.claude-instant-v1"

View file

@ -80,16 +80,6 @@ model_list:
description: this is a test openai model description: this is a test openai model
id: 9b1ef341-322c-410a-8992-903987fef439 id: 9b1ef341-322c-410a-8992-903987fef439
model_name: test_openai_models model_name: test_openai_models
- litellm_params:
model: bedrock/amazon.titan-embed-text-v1
model_info:
mode: embedding
model_name: amazon-embeddings
- litellm_params:
model: sagemaker/berri-benchmarking-gpt-j-6b-fp16
model_info:
mode: embedding
model_name: GPT-J 6B - Sagemaker Text Embedding (Internal)
- litellm_params: - litellm_params:
model: dall-e-3 model: dall-e-3
model_info: model_info:

View file

@ -478,17 +478,18 @@ async def test_async_chat_azure_stream():
## Test Bedrock + sync ## Test Bedrock + sync
@pytest.mark.skip(reason="AWS Suspended Account")
def test_chat_bedrock_stream(): def test_chat_bedrock_stream():
try: try:
customHandler = CompletionCustomHandler() customHandler = CompletionCustomHandler()
litellm.callbacks = [customHandler] litellm.callbacks = [customHandler]
response = litellm.completion( response = litellm.completion(
model="bedrock/anthropic.claude-v1", model="bedrock/anthropic.claude-v2",
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}], messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
) )
# test streaming # test streaming
response = litellm.completion( response = litellm.completion(
model="bedrock/anthropic.claude-v1", model="bedrock/anthropic.claude-v2",
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}], messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
stream=True, stream=True,
) )
@ -497,7 +498,7 @@ def test_chat_bedrock_stream():
# test failure callback # test failure callback
try: try:
response = litellm.completion( response = litellm.completion(
model="bedrock/anthropic.claude-v1", model="bedrock/anthropic.claude-v2",
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}], messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
aws_region_name="my-bad-region", aws_region_name="my-bad-region",
stream=True, stream=True,
@ -518,18 +519,19 @@ def test_chat_bedrock_stream():
## Test Bedrock + Async ## Test Bedrock + Async
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_async_chat_bedrock_stream(): async def test_async_chat_bedrock_stream():
try: try:
customHandler = CompletionCustomHandler() customHandler = CompletionCustomHandler()
litellm.callbacks = [customHandler] litellm.callbacks = [customHandler]
response = await litellm.acompletion( response = await litellm.acompletion(
model="bedrock/anthropic.claude-v1", model="bedrock/anthropic.claude-v2",
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}], messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
) )
# test streaming # test streaming
response = await litellm.acompletion( response = await litellm.acompletion(
model="bedrock/anthropic.claude-v1", model="bedrock/anthropic.claude-v2",
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}], messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
stream=True, stream=True,
) )
@ -540,7 +542,7 @@ async def test_async_chat_bedrock_stream():
## test failure callback ## test failure callback
try: try:
response = await litellm.acompletion( response = await litellm.acompletion(
model="bedrock/anthropic.claude-v1", model="bedrock/anthropic.claude-v2",
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}], messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
aws_region_name="my-bad-key", aws_region_name="my-bad-key",
stream=True, stream=True,
@ -561,6 +563,7 @@ async def test_async_chat_bedrock_stream():
## Test Sagemaker + Async ## Test Sagemaker + Async
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_async_chat_sagemaker_stream(): async def test_async_chat_sagemaker_stream():
try: try:
@ -793,6 +796,7 @@ async def test_async_embedding_azure():
## Test Bedrock + Async ## Test Bedrock + Async
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_async_embedding_bedrock(): async def test_async_embedding_bedrock():
try: try:

View file

@ -388,6 +388,7 @@ async def test_async_custom_handler_embedding_optional_param():
# asyncio.run(test_async_custom_handler_embedding_optional_param()) # asyncio.run(test_async_custom_handler_embedding_optional_param())
@pytest.mark.skip(reason="AWS Account suspended. Pending their approval")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_async_custom_handler_embedding_optional_param_bedrock(): async def test_async_custom_handler_embedding_optional_param_bedrock():
""" """

View file

@ -67,6 +67,7 @@ def verify_log_file(log_file_path):
assert success_count == 3 # Expect 3 success logs from dynamoDB assert success_count == 3 # Expect 3 success logs from dynamoDB
@pytest.mark.skip(reason="AWS Suspended Account")
def test_dynamo_logging(): def test_dynamo_logging():
# all dynamodb requests need to be in one test function # all dynamodb requests need to be in one test function
# since we are modifying stdout, and pytests runs tests in parallel # since we are modifying stdout, and pytests runs tests in parallel

View file

@ -256,6 +256,7 @@ async def test_vertexai_aembedding():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_bedrock_embedding_titan(): def test_bedrock_embedding_titan():
try: try:
# this tests if we support str input for bedrock embedding # this tests if we support str input for bedrock embedding
@ -301,6 +302,7 @@ def test_bedrock_embedding_titan():
# test_bedrock_embedding_titan() # test_bedrock_embedding_titan()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_bedrock_embedding_cohere(): def test_bedrock_embedding_cohere():
try: try:
litellm.set_verbose = False litellm.set_verbose = False
@ -422,6 +424,7 @@ def test_aembedding_azure():
# test_aembedding_azure() # test_aembedding_azure()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_sagemaker_embeddings(): def test_sagemaker_embeddings():
try: try:
response = litellm.embedding( response = litellm.embedding(
@ -438,6 +441,7 @@ def test_sagemaker_embeddings():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_sagemaker_aembeddings(): async def test_sagemaker_aembeddings():
try: try:

View file

@ -42,6 +42,7 @@ exception_models = [
# Test 1: Context Window Errors # Test 1: Context Window Errors
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.parametrize("model", exception_models) @pytest.mark.parametrize("model", exception_models)
def test_context_window(model): def test_context_window(model):
print("Testing context window error") print("Testing context window error")
@ -120,9 +121,9 @@ def invalid_auth(model): # set the model key to an invalid key, depending on th
os.environ["AI21_API_KEY"] = "bad-key" os.environ["AI21_API_KEY"] = "bad-key"
elif "togethercomputer" in model: elif "togethercomputer" in model:
temporary_key = os.environ["TOGETHERAI_API_KEY"] temporary_key = os.environ["TOGETHERAI_API_KEY"]
os.environ[ os.environ["TOGETHERAI_API_KEY"] = (
"TOGETHERAI_API_KEY" "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
] = "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a" )
elif model in litellm.openrouter_models: elif model in litellm.openrouter_models:
temporary_key = os.environ["OPENROUTER_API_KEY"] temporary_key = os.environ["OPENROUTER_API_KEY"]
os.environ["OPENROUTER_API_KEY"] = "bad-key" os.environ["OPENROUTER_API_KEY"] = "bad-key"

View file

@ -87,6 +87,7 @@ async def test_azure_img_gen_health_check():
# asyncio.run(test_azure_img_gen_health_check()) # asyncio.run(test_azure_img_gen_health_check())
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_sagemaker_embedding_health_check(): async def test_sagemaker_embedding_health_check():
response = await litellm.ahealth_check( response = await litellm.ahealth_check(

View file

@ -121,6 +121,7 @@ async def test_async_image_generation_azure():
pytest.fail(f"An exception occurred - {str(e)}") pytest.fail(f"An exception occurred - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_image_generation_bedrock(): def test_image_generation_bedrock():
try: try:
litellm.set_verbose = True litellm.set_verbose = True
@ -141,6 +142,7 @@ def test_image_generation_bedrock():
pytest.fail(f"An exception occurred - {str(e)}") pytest.fail(f"An exception occurred - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_aimage_generation_bedrock_with_optional_params(): async def test_aimage_generation_bedrock_with_optional_params():
try: try:

File diff suppressed because it is too large Load diff

View file

@ -80,6 +80,14 @@ request_data = {
@pytest.fixture @pytest.fixture
def prisma_client(): def prisma_client():
from litellm.proxy.proxy_cli import append_query_params
### add connection pool + pool timeout args
params = {"connection_limit": 100, "pool_timeout": 60}
database_url = os.getenv("DATABASE_URL")
modified_url = append_query_params(database_url, params)
os.environ["DATABASE_URL"] = modified_url
# Assuming DBClient is a class that needs to be instantiated # Assuming DBClient is a class that needs to be instantiated
prisma_client = PrismaClient( prisma_client = PrismaClient(
database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
@ -1633,3 +1641,99 @@ async def test_key_with_no_permissions(prisma_client):
except Exception as e: except Exception as e:
print("Got Exception", e) print("Got Exception", e)
print(e.message) print(e.message)
async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
from litellm import ModelResponse, Choices, Message, Usage
from litellm.proxy.proxy_server import (
_PROXY_track_cost_callback as track_cost_callback,
)
import uuid
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
resp = ModelResponse(
id=request_id,
choices=[
Choices(
finish_reason=None,
index=0,
message=Message(
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
role="assistant",
),
)
],
model="gpt-35-turbo", # azure always has model written like this
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
)
await track_cost_callback(
kwargs={
"call_type": "acompletion",
"model": "sagemaker-chatgpt-v-2",
"stream": True,
"complete_streaming_response": resp,
"litellm_params": {
"metadata": {
"user_api_key": hash_token(generated_key),
"user_api_key_user_id": user_id,
}
},
"response_cost": 0.00005,
},
completion_response=resp,
start_time=datetime.now(),
end_time=datetime.now(),
)
@pytest.mark.skip(reason="High traffic load test for spend tracking")
@pytest.mark.asyncio
async def test_proxy_load_test_db(prisma_client):
"""
Run 1500 req./s against track_cost_callback function
"""
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
from litellm._logging import verbose_proxy_logger
import logging, time
litellm.set_verbose = True
verbose_proxy_logger.setLevel(logging.DEBUG)
try:
start_time = time.time()
await litellm.proxy.proxy_server.prisma_client.connect()
request = GenerateKeyRequest(max_budget=0.00001)
key = await generate_key_fn(request)
print(key)
generated_key = key.key
user_id = key.user_id
bearer_token = "Bearer " + generated_key
request = Request(scope={"type": "http"})
request._url = URL(url="/chat/completions")
# use generated key to auth in
result = await user_api_key_auth(request=request, api_key=bearer_token)
print("result from user auth with new key", result)
# update spend using track_cost callback, make 2nd request, it should fail
n = 5000
tasks = [
track_cost_callback_helper_fn(generated_key=generated_key, user_id=user_id)
for _ in range(n)
]
completions = await asyncio.gather(*tasks)
await asyncio.sleep(120)
try:
# call spend logs
spend_logs = await view_spend_logs(api_key=generated_key)
print(f"len responses: {len(spend_logs)}")
assert len(spend_logs) == n
print(n, time.time() - start_time, len(spend_logs))
except:
print(n, time.time() - start_time, 0)
raise Exception(f"it worked! key={key.key}")
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")

View file

@ -12,6 +12,7 @@ import litellm
from litellm import completion from litellm import completion
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_sagemaker(): def test_completion_sagemaker():
litellm.set_verbose = True litellm.set_verbose = True
litellm.drop_params = True litellm.drop_params = True

View file

@ -473,6 +473,7 @@ def aleph_alpha_test_completion():
# Sagemaker # Sagemaker
@pytest.mark.skip(reason="AWS Suspended Account")
def sagemaker_test_completion(): def sagemaker_test_completion():
litellm.SagemakerConfig(max_new_tokens=10) litellm.SagemakerConfig(max_new_tokens=10)
# litellm.set_verbose=True # litellm.set_verbose=True
@ -514,6 +515,7 @@ def sagemaker_test_completion():
# Bedrock # Bedrock
@pytest.mark.skip(reason="AWS Suspended Account")
def bedrock_test_completion(): def bedrock_test_completion():
litellm.AmazonCohereConfig(max_tokens=10) litellm.AmazonCohereConfig(max_tokens=10)
# litellm.set_verbose=True # litellm.set_verbose=True

View file

@ -125,6 +125,7 @@ def test_embedding(client_no_auth):
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}") pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_bedrock_embedding(client_no_auth): def test_bedrock_embedding(client_no_auth):
global headers global headers
from litellm.proxy.proxy_server import user_custom_auth from litellm.proxy.proxy_server import user_custom_auth
@ -145,6 +146,7 @@ def test_bedrock_embedding(client_no_auth):
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}") pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_sagemaker_embedding(client_no_auth): def test_sagemaker_embedding(client_no_auth):
global headers global headers
from litellm.proxy.proxy_server import user_custom_auth from litellm.proxy.proxy_server import user_custom_auth

View file

@ -61,6 +61,7 @@ def generate_random_word(length=4):
return "".join(random.choice(letters) for _ in range(length)) return "".join(random.choice(letters) for _ in range(length))
@pytest.mark.skip(reason="AWS Suspended Account")
def test_chat_completion(client_no_auth): def test_chat_completion(client_no_auth):
global headers global headers
try: try:

View file

@ -166,14 +166,6 @@ def test_call_one_endpoint():
"tpm": 240000, "tpm": 240000,
"rpm": 1800, "rpm": 1800,
}, },
{
"model_name": "claude-v1",
"litellm_params": {
"model": "bedrock/anthropic.claude-instant-v1",
},
"tpm": 100000,
"rpm": 10000,
},
{ {
"model_name": "text-embedding-ada-002", "model_name": "text-embedding-ada-002",
"litellm_params": { "litellm_params": {
@ -202,15 +194,6 @@ def test_call_one_endpoint():
) )
print("\n response", response) print("\n response", response)
async def call_bedrock_claude():
response = await router.acompletion(
model="bedrock/anthropic.claude-instant-v1",
messages=[{"role": "user", "content": "hello this request will pass"}],
specific_deployment=True,
)
print("\n response", response)
async def call_azure_embedding(): async def call_azure_embedding():
response = await router.aembedding( response = await router.aembedding(
model="azure/azure-embedding-model", model="azure/azure-embedding-model",
@ -221,7 +204,6 @@ def test_call_one_endpoint():
print("\n response", response) print("\n response", response)
asyncio.run(call_azure_completion()) asyncio.run(call_azure_completion())
asyncio.run(call_bedrock_claude())
asyncio.run(call_azure_embedding()) asyncio.run(call_azure_embedding())
os.environ["AZURE_API_BASE"] = old_api_base os.environ["AZURE_API_BASE"] = old_api_base
@ -593,6 +575,7 @@ def test_azure_embedding_on_router():
# test_azure_embedding_on_router() # test_azure_embedding_on_router()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_bedrock_on_router(): def test_bedrock_on_router():
litellm.set_verbose = True litellm.set_verbose = True
print("\n Testing bedrock on router\n") print("\n Testing bedrock on router\n")

View file

@ -87,6 +87,7 @@ def test_router_timeouts():
print("********** TOKENS USED SO FAR = ", total_tokens_used) print("********** TOKENS USED SO FAR = ", total_tokens_used)
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_router_timeouts_bedrock(): async def test_router_timeouts_bedrock():
import openai import openai

View file

@ -764,6 +764,7 @@ def test_completion_replicate_stream_bad_key():
# test_completion_replicate_stream_bad_key() # test_completion_replicate_stream_bad_key()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_claude_stream(): def test_completion_bedrock_claude_stream():
try: try:
litellm.set_verbose = False litellm.set_verbose = False
@ -810,6 +811,7 @@ def test_completion_bedrock_claude_stream():
# test_completion_bedrock_claude_stream() # test_completion_bedrock_claude_stream()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_ai21_stream(): def test_completion_bedrock_ai21_stream():
try: try:
litellm.set_verbose = False litellm.set_verbose = False
@ -911,6 +913,7 @@ def test_sagemaker_weird_response():
# test_sagemaker_weird_response() # test_sagemaker_weird_response()
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_sagemaker_streaming_async(): async def test_sagemaker_streaming_async():
try: try:
@ -949,6 +952,7 @@ async def test_sagemaker_streaming_async():
# asyncio.run(test_sagemaker_streaming_async()) # asyncio.run(test_sagemaker_streaming_async())
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_sagemaker_stream(): def test_completion_sagemaker_stream():
try: try:
response = completion( response = completion(
@ -1075,8 +1079,6 @@ async def test_hf_completion_tgi_stream():
if finished: if finished:
break break
idx += 1 idx += 1
if complete_response.strip() == "":
raise Exception("Empty response received")
print(f"completion_response: {complete_response}") print(f"completion_response: {complete_response}")
except litellm.ServiceUnavailableError as e: except litellm.ServiceUnavailableError as e:
pass pass

View file

@ -317,3 +317,24 @@ def test_token_counter():
# test_token_counter() # test_token_counter()
def test_supports_function_calling():
try:
assert litellm.supports_function_calling(model="gpt-3.5-turbo") == True
assert (
litellm.supports_function_calling(model="azure/gpt-4-1106-preview") == True
)
assert (
litellm.supports_function_calling(model="anthropic.claude-instant-v1")
== False
)
assert litellm.supports_function_calling(model="palm/chat-bison") == False
assert litellm.supports_function_calling(model="ollama/llama2") == False
assert (
litellm.supports_function_calling(model="anthropic.claude-instant-v1")
== False
)
assert litellm.supports_function_calling(model="claude-2") == False
except Exception as e:
pytest.fail(f"Error occurred: {e}")

View file

@ -205,18 +205,18 @@ def map_finish_reason(
class FunctionCall(OpenAIObject): class FunctionCall(OpenAIObject):
arguments: str arguments: str
name: str name: Optional[str] = None
class Function(OpenAIObject): class Function(OpenAIObject):
arguments: str arguments: str
name: str name: Optional[str] = None
class ChatCompletionDeltaToolCall(OpenAIObject): class ChatCompletionDeltaToolCall(OpenAIObject):
id: str id: Optional[str] = None
function: Function function: Function
type: str type: Optional[str] = None
index: int index: int
@ -275,13 +275,19 @@ class Delta(OpenAIObject):
super(Delta, self).__init__(**params) super(Delta, self).__init__(**params)
self.content = content self.content = content
self.role = role self.role = role
self.function_call = function_call if function_call is not None and isinstance(function_call, dict):
if tool_calls is not None and isinstance(tool_calls, dict): self.function_call = FunctionCall(**function_call)
else:
self.function_call = function_call
if tool_calls is not None and isinstance(tool_calls, list):
self.tool_calls = [] self.tool_calls = []
for tool_call in tool_calls: for tool_call in tool_calls:
if tool_call.get("index", None) is None: if isinstance(tool_call, dict):
tool_call["index"] = 0 if tool_call.get("index", None) is None:
self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call)) tool_call["index"] = 0
self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
elif isinstance(tool_call, ChatCompletionDeltaToolCall):
self.tool_calls.append(tool_call)
else: else:
self.tool_calls = tool_calls self.tool_calls = tool_calls
@ -1636,7 +1642,7 @@ class Logging:
verbose_logger.debug( verbose_logger.debug(
"Async success callbacks: Got a complete streaming response" "Async success callbacks: Got a complete streaming response"
) )
self.model_call_details["complete_streaming_response"] = ( self.model_call_details["async_complete_streaming_response"] = (
complete_streaming_response complete_streaming_response
) )
try: try:
@ -1684,28 +1690,31 @@ class Logging:
print_verbose("async success_callback: reaches cache for logging!") print_verbose("async success_callback: reaches cache for logging!")
kwargs = self.model_call_details kwargs = self.model_call_details
if self.stream: if self.stream:
if "complete_streaming_response" not in kwargs: if "async_complete_streaming_response" not in kwargs:
print_verbose( print_verbose(
f"async success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n" f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n"
) )
pass pass
else: else:
print_verbose( print_verbose(
"async success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache" "async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache"
) )
result = kwargs["complete_streaming_response"] result = kwargs["async_complete_streaming_response"]
# only add to cache once we have a complete streaming response # only add to cache once we have a complete streaming response
litellm.cache.add_cache(result, **kwargs) litellm.cache.add_cache(result, **kwargs)
if isinstance(callback, CustomLogger): # custom logger class if isinstance(callback, CustomLogger): # custom logger class
print_verbose( print_verbose(
f"Async success callbacks: {callback}; self.stream: {self.stream}; complete_streaming_response: {self.model_call_details.get('complete_streaming_response', None)}" f"Running Async success callback: {callback}; self.stream: {self.stream}; async_complete_streaming_response: {self.model_call_details.get('async_complete_streaming_response', None)} result={result}"
) )
if self.stream == True: if self.stream == True:
if "complete_streaming_response" in self.model_call_details: if (
"async_complete_streaming_response"
in self.model_call_details
):
await callback.async_log_success_event( await callback.async_log_success_event(
kwargs=self.model_call_details, kwargs=self.model_call_details,
response_obj=self.model_call_details[ response_obj=self.model_call_details[
"complete_streaming_response" "async_complete_streaming_response"
], ],
start_time=start_time, start_time=start_time,
end_time=end_time, end_time=end_time,
@ -1726,14 +1735,18 @@ class Logging:
) )
if callable(callback): # custom logger functions if callable(callback): # custom logger functions
print_verbose( print_verbose(
f"Making async function logging call - {self.model_call_details}" f"Making async function logging call for {callback}, result={result} - {self.model_call_details}"
) )
if self.stream: if self.stream:
if "complete_streaming_response" in self.model_call_details: if (
"async_complete_streaming_response"
in self.model_call_details
):
await customLogger.async_log_event( await customLogger.async_log_event(
kwargs=self.model_call_details, kwargs=self.model_call_details,
response_obj=self.model_call_details[ response_obj=self.model_call_details[
"complete_streaming_response" "async_complete_streaming_response"
], ],
start_time=start_time, start_time=start_time,
end_time=end_time, end_time=end_time,
@ -1754,14 +1767,17 @@ class Logging:
if dynamoLogger is None: if dynamoLogger is None:
dynamoLogger = DyanmoDBLogger() dynamoLogger = DyanmoDBLogger()
if self.stream: if self.stream:
if "complete_streaming_response" in self.model_call_details: if (
"async_complete_streaming_response"
in self.model_call_details
):
print_verbose( print_verbose(
"DynamoDB Logger: Got Stream Event - Completed Stream Response" "DynamoDB Logger: Got Stream Event - Completed Stream Response"
) )
await dynamoLogger._async_log_event( await dynamoLogger._async_log_event(
kwargs=self.model_call_details, kwargs=self.model_call_details,
response_obj=self.model_call_details[ response_obj=self.model_call_details[
"complete_streaming_response" "async_complete_streaming_response"
], ],
start_time=start_time, start_time=start_time,
end_time=end_time, end_time=end_time,
@ -3715,6 +3731,54 @@ def completion_cost(
raise e raise e
def supports_function_calling(model: str):
"""
Check if the given model supports function calling and return a boolean value.
Parameters:
model (str): The model name to be checked.
Returns:
bool: True if the model supports function calling, False otherwise.
Raises:
Exception: If the given model is not found in model_prices_and_context_window.json.
"""
if model in litellm.model_cost:
model_info = litellm.model_cost[model]
if model_info.get("supports_function_calling", False):
return True
return False
else:
raise Exception(
f"Model not in model_prices_and_context_window.json. You passed model={model}."
)
def supports_parallel_function_calling(model: str):
"""
Check if the given model supports parallel function calling and return True if it does, False otherwise.
Parameters:
model (str): The model to check for support of parallel function calling.
Returns:
bool: True if the model supports parallel function calling, False otherwise.
Raises:
Exception: If the model is not found in the model_cost dictionary.
"""
if model in litellm.model_cost:
model_info = litellm.model_cost[model]
if model_info.get("supports_parallel_function_calling", False):
return True
return False
else:
raise Exception(
f"Model not in model_prices_and_context_window.json. You passed model={model}."
)
####### HELPER FUNCTIONS ################ ####### HELPER FUNCTIONS ################
def register_model(model_cost: Union[str, dict]): def register_model(model_cost: Union[str, dict]):
""" """
@ -4043,6 +4107,7 @@ def get_optional_params(
and custom_llm_provider != "vertex_ai" and custom_llm_provider != "vertex_ai"
and custom_llm_provider != "anyscale" and custom_llm_provider != "anyscale"
and custom_llm_provider != "together_ai" and custom_llm_provider != "together_ai"
and custom_llm_provider != "mistral"
): ):
if custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat": if custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
# ollama actually supports json output # ollama actually supports json output
@ -4713,7 +4778,14 @@ def get_optional_params(
if max_tokens: if max_tokens:
optional_params["max_tokens"] = max_tokens optional_params["max_tokens"] = max_tokens
elif custom_llm_provider == "mistral": elif custom_llm_provider == "mistral":
supported_params = ["temperature", "top_p", "stream", "max_tokens"] supported_params = [
"temperature",
"top_p",
"stream",
"max_tokens",
"tools",
"tool_choice",
]
_check_valid_arg(supported_params=supported_params) _check_valid_arg(supported_params=supported_params)
if temperature is not None: if temperature is not None:
optional_params["temperature"] = temperature optional_params["temperature"] = temperature
@ -4723,6 +4795,10 @@ def get_optional_params(
optional_params["stream"] = stream optional_params["stream"] = stream
if max_tokens is not None: if max_tokens is not None:
optional_params["max_tokens"] = max_tokens optional_params["max_tokens"] = max_tokens
if tools is not None:
optional_params["tools"] = tools
if tool_choice is not None:
optional_params["tool_choice"] = tool_choice
# check safe_mode, random_seed: https://docs.mistral.ai/api/#operation/createChatCompletion # check safe_mode, random_seed: https://docs.mistral.ai/api/#operation/createChatCompletion
safe_mode = passed_params.pop("safe_mode", None) safe_mode = passed_params.pop("safe_mode", None)
@ -6947,7 +7023,7 @@ def exception_type(
if "500 An internal error has occurred." in error_str: if "500 An internal error has occurred." in error_str:
exception_mapping_worked = True exception_mapping_worked = True
raise APIError( raise APIError(
status_code=original_exception.status_code, status_code=getattr(original_exception, "status_code", 500),
message=f"PalmException - {original_exception.message}", message=f"PalmException - {original_exception.message}",
llm_provider="palm", llm_provider="palm",
model=model, model=model,
@ -8730,7 +8806,7 @@ class CustomStreamWrapper:
or original_chunk.choices[0].delta.tool_calls is not None or original_chunk.choices[0].delta.tool_calls is not None
): ):
try: try:
delta = dict(original_chunk.choices[0].delta) delta = original_chunk.choices[0].delta
model_response.system_fingerprint = ( model_response.system_fingerprint = (
original_chunk.system_fingerprint original_chunk.system_fingerprint
) )
@ -8765,7 +8841,9 @@ class CustomStreamWrapper:
is None is None
): ):
t.function.arguments = "" t.function.arguments = ""
model_response.choices[0].delta = Delta(**delta) _json_delta = delta.model_dump()
print_verbose(f"_json_delta: {_json_delta}")
model_response.choices[0].delta = Delta(**_json_delta)
except Exception as e: except Exception as e:
traceback.print_exc() traceback.print_exc()
model_response.choices[0].delta = Delta() model_response.choices[0].delta = Delta()

View file

@ -6,7 +6,8 @@
"input_cost_per_token": 0.00003, "input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"gpt-4-turbo-preview": { "gpt-4-turbo-preview": {
"max_tokens": 8192, "max_tokens": 8192,
@ -15,7 +16,9 @@
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"gpt-4-0314": { "gpt-4-0314": {
"max_tokens": 8192, "max_tokens": 8192,
@ -33,7 +36,8 @@
"input_cost_per_token": 0.00003, "input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"gpt-4-32k": { "gpt-4-32k": {
"max_tokens": 32768, "max_tokens": 32768,
@ -69,7 +73,9 @@
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"gpt-4-0125-preview": { "gpt-4-0125-preview": {
"max_tokens": 128000, "max_tokens": 128000,
@ -78,7 +84,9 @@
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"gpt-4-vision-preview": { "gpt-4-vision-preview": {
"max_tokens": 128000, "max_tokens": 128000,
@ -105,7 +113,8 @@
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"gpt-3.5-turbo-0301": { "gpt-3.5-turbo-0301": {
"max_tokens": 4097, "max_tokens": 4097,
@ -123,7 +132,8 @@
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"gpt-3.5-turbo-1106": { "gpt-3.5-turbo-1106": {
"max_tokens": 16385, "max_tokens": 16385,
@ -132,7 +142,9 @@
"input_cost_per_token": 0.0000010, "input_cost_per_token": 0.0000010,
"output_cost_per_token": 0.0000020, "output_cost_per_token": 0.0000020,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"gpt-3.5-turbo-0125": { "gpt-3.5-turbo-0125": {
"max_tokens": 16385, "max_tokens": 16385,
@ -141,7 +153,9 @@
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.0000005,
"output_cost_per_token": 0.0000015, "output_cost_per_token": 0.0000015,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"gpt-3.5-turbo-16k": { "gpt-3.5-turbo-16k": {
"max_tokens": 16385, "max_tokens": 16385,
@ -286,7 +300,9 @@
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"azure/gpt-4-1106-preview": { "azure/gpt-4-1106-preview": {
"max_tokens": 128000, "max_tokens": 128000,
@ -295,7 +311,9 @@
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"azure/gpt-4-0613": { "azure/gpt-4-0613": {
"max_tokens": 8192, "max_tokens": 8192,
@ -304,7 +322,8 @@
"input_cost_per_token": 0.00003, "input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"azure/gpt-4-32k-0613": { "azure/gpt-4-32k-0613": {
"max_tokens": 32768, "max_tokens": 32768,
@ -331,7 +350,8 @@
"input_cost_per_token": 0.00003, "input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"azure/gpt-4-turbo": { "azure/gpt-4-turbo": {
"max_tokens": 128000, "max_tokens": 128000,
@ -340,7 +360,9 @@
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"azure/gpt-4-turbo-vision-preview": { "azure/gpt-4-turbo-vision-preview": {
"max_tokens": 128000, "max_tokens": 128000,
@ -358,7 +380,8 @@
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004, "output_cost_per_token": 0.000004,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"azure/gpt-35-turbo-1106": { "azure/gpt-35-turbo-1106": {
"max_tokens": 16384, "max_tokens": 16384,
@ -367,7 +390,20 @@
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"azure/gpt-35-turbo-0125": {
"max_tokens": 16384,
"max_input_tokens": 16384,
"max_output_tokens": 4096,
"input_cost_per_token": 0.0000005,
"output_cost_per_token": 0.0000015,
"litellm_provider": "azure",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
}, },
"azure/gpt-35-turbo-16k": { "azure/gpt-35-turbo-16k": {
"max_tokens": 16385, "max_tokens": 16385,
@ -385,7 +421,8 @@
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"azure/ada": { "azure/ada": {
"max_tokens": 8191, "max_tokens": 8191,
@ -514,11 +551,12 @@
"mode": "chat" "mode": "chat"
}, },
"mistral/mistral-large-latest": { "mistral/mistral-large-latest": {
"max_tokens": 8192, "max_tokens": 32000,
"input_cost_per_token": 0.000008, "input_cost_per_token": 0.000008,
"output_cost_per_token": 0.000024, "output_cost_per_token": 0.000024,
"litellm_provider": "mistral", "litellm_provider": "mistral",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"mistral/mistral-embed": { "mistral/mistral-embed": {
"max_tokens": 8192, "max_tokens": 8192,
@ -676,7 +714,8 @@
"input_cost_per_token": 0.00000025, "input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005, "output_cost_per_token": 0.0000005,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
}, },
"gemini-1.5-pro": { "gemini-1.5-pro": {
"max_tokens": 8192, "max_tokens": 8192,
@ -1738,6 +1777,23 @@
"output_cost_per_token": 0.0000009, "output_cost_per_token": 0.0000009,
"litellm_provider": "together_ai" "litellm_provider": "together_ai"
}, },
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
"input_cost_per_token": 0.0000006,
"output_cost_per_token": 0.0000006,
"litellm_provider": "together_ai",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"together_ai/mistralai/Mistral-7B-Instruct-v0.1": {
"litellm_provider": "together_ai",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"together_ai/togethercomputer/CodeLlama-34b-Instruct": {
"litellm_provider": "together_ai",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"ollama/llama2": { "ollama/llama2": {
"max_tokens": 4096, "max_tokens": 4096,
"input_cost_per_token": 0.0, "input_cost_per_token": 0.0,
@ -1990,7 +2046,16 @@
"input_cost_per_token": 0.00000015, "input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015, "output_cost_per_token": 0.00000015,
"litellm_provider": "anyscale", "litellm_provider": "anyscale",
"mode": "chat" "mode": "chat",
"supports_function_calling": true
},
"anyscale/Mixtral-8x7B-Instruct-v0.1": {
"max_tokens": 16384,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "anyscale",
"mode": "chat",
"supports_function_calling": true
}, },
"anyscale/HuggingFaceH4/zephyr-7b-beta": { "anyscale/HuggingFaceH4/zephyr-7b-beta": {
"max_tokens": 16384, "max_tokens": 16384,

View file

@ -40,6 +40,8 @@ litellm_settings:
budget_duration: 30d budget_duration: 30d
general_settings: general_settings:
master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234) master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
proxy_budget_rescheduler_min_time: 30
proxy_budget_rescheduler_max_time: 60
# database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy # database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy
environment_variables: environment_variables:

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.27.12" version = "1.28.0"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.27.12" version = "1.28.0"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

View file

@ -10,6 +10,7 @@ gunicorn==21.2.0 # server dep
boto3==1.34.34 # aws bedrock/sagemaker calls boto3==1.34.34 # aws bedrock/sagemaker calls
redis==5.0.0 # caching redis==5.0.0 # caching
numpy==1.24.3 # semantic caching numpy==1.24.3 # semantic caching
pandas==2.1.1 # for viewing clickhouse spend analytics
prisma==0.11.0 # for db prisma==0.11.0 # for db
mangum==0.17.0 # for aws lambda functions mangum==0.17.0 # for aws lambda functions
google-generativeai==0.3.2 # for vertex ai calls google-generativeai==0.3.2 # for vertex ai calls

View file

@ -449,7 +449,7 @@ async def test_key_with_budgets():
reset_at_init_value = key_info["info"]["budget_reset_at"] reset_at_init_value = key_info["info"]["budget_reset_at"]
reset_at_new_value = None reset_at_new_value = None
i = 0 i = 0
await asyncio.sleep(610) await asyncio.sleep(120)
while i < 3: while i < 3:
key_info = await get_key_info(session=session, get_key=key, call_key=key) key_info = await get_key_info(session=session, get_key=key, call_key=key)
reset_at_new_value = key_info["info"]["budget_reset_at"] reset_at_new_value = key_info["info"]["budget_reset_at"]
@ -490,6 +490,7 @@ async def test_key_crossing_budget():
assert "ExceededTokenBudget: Current spend for token:" in str(e) assert "ExceededTokenBudget: Current spend for token:" in str(e)
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_key_info_spend_values_sagemaker(): async def test_key_info_spend_values_sagemaker():
""" """

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a697f24d60c9c262.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a697f24d60c9c262.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/6920a121699cde9c.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[24143,[\"303\",\"static/chunks/303-d80f23087a9e6aec.js\",\"931\",\"static/chunks/app/page-d4fe4a48cbd3572c.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/6920a121699cde9c.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"eSwVwl_InIrhYtCAqDMKF\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[30280,[\"303\",\"static/chunks/303-d80f23087a9e6aec.js\",\"931\",\"static/chunks/app/page-8f65fc157f538dff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kyOCJPBB9pyUfbMKCAXr-\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[24143,["303","static/chunks/303-d80f23087a9e6aec.js","931","static/chunks/app/page-d4fe4a48cbd3572c.js"],""] 3:I[30280,["303","static/chunks/303-d80f23087a9e6aec.js","931","static/chunks/app/page-8f65fc157f538dff.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["eSwVwl_InIrhYtCAqDMKF",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/6920a121699cde9c.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["kyOCJPBB9pyUfbMKCAXr-",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a40ad0909dd7838e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -6,6 +6,7 @@ import UserDashboard from "../components/user_dashboard";
import ModelDashboard from "@/components/model_dashboard"; import ModelDashboard from "@/components/model_dashboard";
import ViewUserDashboard from "@/components/view_users"; import ViewUserDashboard from "@/components/view_users";
import Teams from "@/components/teams"; import Teams from "@/components/teams";
import AdminPanel from "@/components/admins";
import ChatUI from "@/components/chat_ui"; import ChatUI from "@/components/chat_ui";
import Sidebar from "../components/leftnav"; import Sidebar from "../components/leftnav";
import Usage from "../components/usage"; import Usage from "../components/usage";
@ -73,6 +74,10 @@ const CreateKeyPage = () => {
return "App Owner"; return "App Owner";
case "app_admin": case "app_admin":
return "Admin"; return "Admin";
case "proxy_admin":
return "Admin";
case "proxy_admin_viewer":
return "Admin Viewer";
case "app_user": case "app_user":
return "App User"; return "App User";
default: default:
@ -133,6 +138,12 @@ const CreateKeyPage = () => {
searchParams={searchParams} searchParams={searchParams}
accessToken={accessToken} accessToken={accessToken}
/> />
) : page == "admin-panel" ? (
<AdminPanel
setTeams={setTeams}
searchParams={searchParams}
accessToken={accessToken}
/>
) : ( ) : (
<Usage <Usage
userID={userID} userID={userID}

View file

@ -0,0 +1,228 @@
/**
* Allow proxy admin to add other people to view global spend
* Use this to avoid sharing master key with others
*/
import React, { useState, useEffect } from "react";
import { Typography } from "antd";
import {
Button as Button2,
Modal,
Form,
Input,
Select as Select2,
InputNumber,
message,
} from "antd";
import { Select, SelectItem } from "@tremor/react";
import {
Table,
TableBody,
TableCell,
TableHead,
TableHeaderCell,
TableRow,
Card,
Icon,
Button,
Col,
Text,
Grid,
} from "@tremor/react";
import { CogIcon } from "@heroicons/react/outline";
interface AdminPanelProps {
searchParams: any;
accessToken: string | null;
setTeams: React.Dispatch<React.SetStateAction<Object[] | null>>;
}
import {
userUpdateUserCall,
Member,
userGetAllUsersCall,
User,
} from "./networking";
const AdminPanel: React.FC<AdminPanelProps> = ({
searchParams,
accessToken,
}) => {
const [form] = Form.useForm();
const [memberForm] = Form.useForm();
const { Title, Paragraph } = Typography;
const [value, setValue] = useState("");
const [admins, setAdmins] = useState<null | any[]>(null);
const [isAddMemberModalVisible, setIsAddMemberModalVisible] = useState(false);
useEffect(() => {
// Fetch model info and set the default selected model
const fetchProxyAdminInfo = async () => {
if (accessToken != null) {
const combinedList: any[] = [];
const proxyViewers = await userGetAllUsersCall(
accessToken,
"proxy_admin_viewer"
);
proxyViewers.forEach((viewer: User) => {
combinedList.push({
user_role: viewer.user_role,
user_id: viewer.user_id,
user_email: viewer.user_email,
});
});
console.log(`proxy viewers: ${proxyViewers}`);
const proxyAdmins = await userGetAllUsersCall(
accessToken,
"proxy_admin"
);
proxyAdmins.forEach((admins: User) => {
combinedList.push({
user_role: admins.user_role,
user_id: admins.user_id,
user_email: admins.user_email,
});
});
console.log(`proxy admins: ${proxyAdmins}`);
console.log(`combinedList: ${combinedList}`);
setAdmins(combinedList);
}
};
fetchProxyAdminInfo();
}, [accessToken]);
const handleMemberOk = () => {
setIsAddMemberModalVisible(false);
memberForm.resetFields();
};
const handleMemberCancel = () => {
setIsAddMemberModalVisible(false);
memberForm.resetFields();
};
const handleMemberCreate = async (formValues: Record<string, any>) => {
try {
if (accessToken != null && admins != null) {
message.info("Making API Call");
const user_role: Member = {
role: "user",
user_email: formValues.user_email,
user_id: formValues.user_id,
};
const response: any = await userUpdateUserCall(accessToken, formValues);
console.log(`response for team create call: ${response}`);
// Checking if the team exists in the list and updating or adding accordingly
const foundIndex = admins.findIndex((user) => {
console.log(
`user.user_id=${user.user_id}; response.user_id=${response.user_id}`
);
return user.user_id === response.user_id;
});
console.log(`foundIndex: ${foundIndex}`);
if (foundIndex == -1) {
console.log(`updates admin with new user`);
admins.push(response);
// If new user is found, update it
setAdmins(admins); // Set the new state
}
setIsAddMemberModalVisible(false);
}
} catch (error) {
console.error("Error creating the key:", error);
}
};
console.log(`admins: ${admins?.length}`);
return (
<div className="w-full m-2">
<Title level={4}>Proxy Admins</Title>
<Paragraph>
Add other people to just view global spend. They cannot create teams or
grant users access to new models.
</Paragraph>
<Grid numItems={1} className="gap-2 p-0 w-full">
<Col numColSpan={1}>
<Card className="w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]">
<Table>
<TableHead>
<TableRow>
<TableHeaderCell>Member Name</TableHeaderCell>
<TableHeaderCell>Role</TableHeaderCell>
<TableHeaderCell>Action</TableHeaderCell>
</TableRow>
</TableHead>
<TableBody>
{admins
? admins.map((member: any, index: number) => (
<TableRow key={index}>
<TableCell>
{member["user_email"]
? member["user_email"]
: member["user_id"]
? member["user_id"]
: null}
</TableCell>
<TableCell>{member["user_role"]}</TableCell>
<TableCell>
<Icon icon={CogIcon} size="sm" />
</TableCell>
</TableRow>
))
: null}
</TableBody>
</Table>
</Card>
</Col>
<Col numColSpan={1}>
<Button
className="mx-auto mb-5"
onClick={() => setIsAddMemberModalVisible(true)}
>
+ Add viewer
</Button>
<Modal
title="Add viewer"
visible={isAddMemberModalVisible}
width={800}
footer={null}
onOk={handleMemberOk}
onCancel={handleMemberCancel}
>
<Form
form={form}
onFinish={handleMemberCreate}
labelCol={{ span: 8 }}
wrapperCol={{ span: 16 }}
labelAlign="left"
>
<>
<Form.Item label="Email" name="user_email" className="mb-4">
<Input
name="user_email"
className="px-3 py-2 border rounded-md w-full"
/>
</Form.Item>
<div className="text-center mb-4">OR</div>
<Form.Item label="User ID" name="user_id" className="mb-4">
<Input
name="user_id"
className="px-3 py-2 border rounded-md w-full"
/>
</Form.Item>
</>
<div style={{ textAlign: "right", marginTop: "10px" }}>
<Button2 htmlType="submit">Add member</Button2>
</div>
</Form>
</Modal>
</Col>
</Grid>
</div>
);
};
export default AdminPanel;

View file

@ -46,6 +46,11 @@ const Sidebar: React.FC<SidebarProps> = ({
Teams Teams
</Menu.Item> </Menu.Item>
) : null} ) : null}
{userRole == "Admin" ? (
<Menu.Item key="7" onClick={() => setPage("admin-panel")}>
Admin
</Menu.Item>
) : null}
</Menu> </Menu>
</Sider> </Sider>
</Layout> </Layout>

View file

@ -313,6 +313,7 @@ export const userSpendLogsCall = async (
endTime: String endTime: String
) => { ) => {
try { try {
console.log(`user role in spend logs call: ${userRole}`);
let url = proxyBaseUrl ? `${proxyBaseUrl}/spend/logs` : `/spend/logs`; let url = proxyBaseUrl ? `${proxyBaseUrl}/spend/logs` : `/spend/logs`;
if (userRole == "App Owner") { if (userRole == "App Owner") {
url = `${url}/?user_id=${userID}&start_date=${startTime}&end_date=${endTime}`; url = `${url}/?user_id=${userID}&start_date=${startTime}&end_date=${endTime}`;
@ -343,6 +344,96 @@ export const userSpendLogsCall = async (
} }
}; };
export const adminSpendLogsCall = async (accessToken: String) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/global/spend/logs`
: `/global/spend/logs`;
message.info("Making spend logs request");
const response = await fetch(url, {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
});
if (!response.ok) {
const errorData = await response.text();
message.error(errorData);
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log(data);
message.success("Spend Logs received");
return data;
} catch (error) {
console.error("Failed to create key:", error);
throw error;
}
};
export const adminTopKeysCall = async (accessToken: String) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/global/spend/keys?limit=5`
: `/global/spend/keys?limit=5`;
message.info("Making spend keys request");
const response = await fetch(url, {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
});
if (!response.ok) {
const errorData = await response.text();
message.error(errorData);
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log(data);
message.success("Spend Logs received");
return data;
} catch (error) {
console.error("Failed to create key:", error);
throw error;
}
};
export const adminTopModelsCall = async (accessToken: String) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/global/spend/models?limit=5`
: `/global/spend/models?limit=5`;
message.info("Making spend models request");
const response = await fetch(url, {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
});
if (!response.ok) {
const errorData = await response.text();
message.error(errorData);
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log(data);
message.success("Spend Logs received");
return data;
} catch (error) {
console.error("Failed to create key:", error);
throw error;
}
};
export const keyInfoCall = async (accessToken: String, keys: String[]) => { export const keyInfoCall = async (accessToken: String, keys: String[]) => {
try { try {
let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/v2/key/info`; let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/v2/key/info`;
@ -468,6 +559,46 @@ export const userGetRequesedtModelsCall = async (accessToken: String) => {
} }
}; };
export interface User {
user_role: string;
user_id: string;
user_email: string;
[key: string]: string; // Include any other potential keys in the dictionary
}
export const userGetAllUsersCall = async (
accessToken: String,
role: String
) => {
try {
const url = proxyBaseUrl
? `${proxyBaseUrl}/user/get_users?role=${role}`
: `/user/get_users?role=${role}`;
console.log("in userGetAllUsersCall:", url);
const response = await fetch(url, {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
});
if (!response.ok) {
const errorData = await response.text();
message.error("Failed to delete key: " + errorData);
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log(data);
message.success("Got all users");
return data;
// Handle success - you might want to update some state or UI based on the created key
} catch (error) {
console.error("Failed to get requested models:", error);
throw error;
}
};
export const teamCreateCall = async ( export const teamCreateCall = async (
accessToken: string, accessToken: string,
formValues: Record<string, any> // Assuming formValues is an object formValues: Record<string, any> // Assuming formValues is an object
@ -549,3 +680,41 @@ export const teamMemberAddCall = async (
throw error; throw error;
} }
}; };
export const userUpdateUserCall = async (
accessToken: string,
formValues: any // Assuming formValues is an object
) => {
try {
console.log("Form Values in userUpdateUserCall:", formValues); // Log the form values before making the API call
const url = proxyBaseUrl ? `${proxyBaseUrl}/user/update` : `/user/update`;
const response = await fetch(url, {
method: "POST",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
user_role: "proxy_admin_viewer",
...formValues, // Include formValues in the request body
}),
});
if (!response.ok) {
const errorData = await response.text();
message.error("Failed to create key: " + errorData);
console.error("Error response from the server:", errorData);
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log("API Response:", data);
message.success("User role updated");
return data;
// Handle success - you might want to update some state or UI based on the created key
} catch (error) {
console.error("Failed to create key:", error);
throw error;
}
};

View file

@ -2,7 +2,13 @@ import { BarChart, Card, Title } from "@tremor/react";
import React, { useState, useEffect } from "react"; import React, { useState, useEffect } from "react";
import { Grid, Col, Text, LineChart } from "@tremor/react"; import { Grid, Col, Text, LineChart } from "@tremor/react";
import { userSpendLogsCall, keyInfoCall } from "./networking"; import {
userSpendLogsCall,
keyInfoCall,
adminSpendLogsCall,
adminTopKeysCall,
adminTopModelsCall,
} from "./networking";
import { start } from "repl"; import { start } from "repl";
interface UsagePageProps { interface UsagePageProps {
@ -164,29 +170,61 @@ const UsagePage: React.FC<UsagePageProps> = ({
if (accessToken && token && userRole && userID) { if (accessToken && token && userRole && userID) {
const fetchData = async () => { const fetchData = async () => {
try { try {
await userSpendLogsCall( /**
accessToken, * If user is Admin - query the global views endpoints
token, * If user is App Owner - use the normal spend logs call
userRole, */
userID, console.log(`user role: ${userRole}`);
startTime, if (userRole == "Admin") {
endTime const overall_spend = await adminSpendLogsCall(accessToken);
).then(async (response) => { setKeySpendData(overall_spend);
const topKeysResponse = await keyInfoCall( const top_keys = await adminTopKeysCall(accessToken);
accessToken, const filtered_keys = top_keys.map((k: any) => ({
getTopKeys(response) key: (k["key_name"] || k["key_alias"] || k["api_key"]).substring(
);
const filtered_keys = topKeysResponse["info"].map((k: any) => ({
key: (k["key_name"] || k["key_alias"] || k["token"]).substring(
0, 0,
7 7
), ),
spend: k["spend"], spend: k["total_spend"],
})); }));
setTopKeys(filtered_keys); setTopKeys(filtered_keys);
setTopUsers(getTopUsers(response)); const top_models = await adminTopModelsCall(accessToken);
setKeySpendData(response); } else if (userRole == "App Owner") {
}); await userSpendLogsCall(
accessToken,
token,
userRole,
userID,
startTime,
endTime
).then(async (response) => {
console.log("result from spend logs call", response);
if ("daily_spend" in response) {
// this is from clickhouse analytics
//
let daily_spend = response["daily_spend"];
console.log("daily spend", daily_spend);
setKeySpendData(daily_spend);
let topApiKeys = response.top_api_keys;
setTopKeys(topApiKeys);
} else {
const topKeysResponse = await keyInfoCall(
accessToken,
getTopKeys(response)
);
const filtered_keys = topKeysResponse["info"].map((k: any) => ({
key: (
k["key_name"] ||
k["key_alias"] ||
k["token"]
).substring(0, 7),
spend: k["spend"],
}));
setTopKeys(filtered_keys);
setTopUsers(getTopUsers(response));
setKeySpendData(response);
}
});
}
} catch (error) { } catch (error) {
console.error("There was an error fetching the data", error); console.error("There was an error fetching the data", error);
// Optionally, update your UI to reflect the error state here as well // Optionally, update your UI to reflect the error state here as well
@ -210,7 +248,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
valueFormatter={valueFormatter} valueFormatter={valueFormatter}
yAxisWidth={100} yAxisWidth={100}
tickGap={5} tickGap={5}
customTooltip={customTooltip} // customTooltip={customTooltip}
/> />
</Card> </Card>
</Col> </Col>