Merge branch 'main' of github.com:lunary-ai/litellm

This commit is contained in:
Vince Loewe 2024-02-28 22:18:43 -08:00
commit ab415d5165
68 changed files with 2676 additions and 1126 deletions

View file

@ -130,6 +130,7 @@ jobs:
pip install "langfuse>=2.0.0"
pip install numpydoc
pip install prisma
pip install fastapi
pip install "httpx==0.24.1"
pip install "gunicorn==21.2.0"
pip install "anyio==3.7.1"

View file

@ -1,18 +1,25 @@
# Function Calling
Function calling is supported with the following models on OpenAI, Azure OpenAI
- gpt-4
- gpt-4-1106-preview
- gpt-4-0613
- gpt-3.5-turbo
- gpt-3.5-turbo-1106
- gpt-3.5-turbo-0613
- Non OpenAI LLMs (litellm adds the function call to the prompt for these llms)
## Checking if a model supports function calling
In addition, parallel function calls is supported on the following models:
- gpt-4-1106-preview
- gpt-3.5-turbo-1106
Use `litellm.supports_function_calling(model="")` -> returns `True` if model supports Function calling, `False` if not
```python
assert litellm.supports_function_calling(model="gpt-3.5-turbo") == True
assert litellm.supports_function_calling(model="azure/gpt-4-1106-preview") == True
assert litellm.supports_function_calling(model="palm/chat-bison") == False
assert litellm.supports_function_calling(model="ollama/llama2") == False
```
## Checking if a model supports parallel function calling
Use `litellm.supports_parallel_function_calling(model="")` -> returns `True` if model supports parallel function calling, `False` if not
```python
assert litellm.supports_parallel_function_calling(model="gpt-4-turbo-preview") == True
assert litellm.supports_parallel_function_calling(model="gpt-4") == False
```
## Parallel Function calling
Parallel function calling is the model's ability to perform multiple function calls together, allowing the effects and results of these function calls to be resolved in parallel

View file

@ -291,7 +291,6 @@ Here's an example of using a bedrock model with LiteLLM
| Anthropic Claude-V2.1 | `completion(model='bedrock/anthropic.claude-v2:1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
| Anthropic Claude-V2 | `completion(model='bedrock/anthropic.claude-v2', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
| Anthropic Claude-Instant V1 | `completion(model='bedrock/anthropic.claude-instant-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
| Anthropic Claude-V1 | `completion(model='bedrock/anthropic.claude-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
| Amazon Titan Lite | `completion(model='bedrock/amazon.titan-text-lite-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
| Amazon Titan Express | `completion(model='bedrock/amazon.titan-text-express-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
| Cohere Command | `completion(model='bedrock/cohere.command-text-v14', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |

View file

@ -0,0 +1,44 @@
# 💸 GET Daily Spend, Usage Metrics
## Request Format
```shell
curl -X GET "http://0.0.0.0:4000/daily_metrics" -H "Authorization: Bearer sk-1234"
```
## Response format
```json
[
daily_spend = [
{
"daily_spend": 7.9261938052047e+16,
"day": "2024-02-01T00:00:00",
"spend_per_model": {"azure/gpt-4": 7.9261938052047e+16},
"spend_per_api_key": {
"76": 914495704992000.0,
"12": 905726697912000.0,
"71": 866312628003000.0,
"28": 865461799332000.0,
"13": 859151538396000.0
}
},
{
"daily_spend": 7.938489251309491e+16,
"day": "2024-02-02T00:00:00",
"spend_per_model": {"gpt-3.5": 7.938489251309491e+16},
"spend_per_api_key": {
"91": 896805036036000.0,
"78": 889692646082000.0,
"49": 885386687861000.0,
"28": 873869890984000.0,
"56": 867398637692000.0
}
}
],
total_spend = 200,
top_models = {"gpt4": 0.2, "vertexai/gemini-pro":10},
top_api_keys = {"899922": 0.9, "838hcjd999seerr88": 20}
]
```

View file

@ -186,6 +186,20 @@ If you don't see all your keys this could be due to a cached token. So just re-l
:::
### Restrict UI Access
You can restrict UI Access to just admins - includes you (proxy_admin) and people you give view only access to (proxy_admin_viewer) for seeing global spend.
**Step 1. Set 'admin_only' access**
```yaml
general_settings:
ui_access_mode: "admin_only"
```
**Step 2. Invite view-only users**
<Image img={require('../../img/admin_ui_viewer.png')} />
### Custom Branding Admin UI
Use your companies custom branding on the LiteLLM Admin UI

Binary file not shown.

After

Width:  |  Height:  |  Size: 131 KiB

View file

@ -40,6 +40,7 @@ const sidebars = {
"proxy/virtual_keys",
"proxy/users",
"proxy/ui",
"proxy/metrics",
"proxy/model_management",
"proxy/health",
"proxy/debugging",

View file

@ -110,3 +110,138 @@ async def view_spend_logs_from_clickhouse(
"log_count": num_rows,
}
return response_data
def _create_clickhouse_material_views(client=None, table_names=[]):
# Create Materialized Views if they don't exist
# Materialized Views send new inserted rows to the aggregate tables
verbose_logger.debug("Clickhouse: Creating Materialized Views")
if "daily_aggregated_spend_per_model_mv" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
TO daily_aggregated_spend_per_model
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend,
model as model
FROM spend_logs
GROUP BY
day, model
"""
)
if "daily_aggregated_spend_per_api_key_mv" not in table_names:
verbose_logger.debug(
"Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
)
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
TO daily_aggregated_spend_per_api_key
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend,
api_key as api_key
FROM spend_logs
GROUP BY
day, api_key
"""
)
if "daily_aggregated_spend_per_user_mv" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
TO daily_aggregated_spend_per_user
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend,
user as user
FROM spend_logs
GROUP BY
day, user
"""
)
if "daily_aggregated_spend_mv" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
TO daily_aggregated_spend
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend
FROM spend_logs
GROUP BY
day
"""
)
def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
# Basic Logging works without this - this is only used for low latency reporting apis
verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
# Create Aggregeate Tables if they don't exist
if "daily_aggregated_spend_per_model" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
client.command(
"""
CREATE TABLE daily_aggregated_spend_per_model
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
`model` String
)
ENGINE = SummingMergeTree()
ORDER BY (day, model);
"""
)
if "daily_aggregated_spend_per_api_key" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
client.command(
"""
CREATE TABLE daily_aggregated_spend_per_api_key
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
`api_key` String
)
ENGINE = SummingMergeTree()
ORDER BY (day, api_key);
"""
)
if "daily_aggregated_spend_per_user" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
client.command(
"""
CREATE TABLE daily_aggregated_spend_per_user
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
`user` String
)
ENGINE = SummingMergeTree()
ORDER BY (day, user);
"""
)
if "daily_aggregated_spend" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
client.command(
"""
CREATE TABLE daily_aggregated_spend
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
)
ENGINE = SummingMergeTree()
ORDER BY (day);
"""
)
return

View file

@ -549,6 +549,8 @@ from .utils import (
token_counter,
cost_per_token,
completion_cost,
supports_function_calling,
supports_parallel_function_calling,
get_litellm_params,
Logging,
acreate,

View file

@ -27,6 +27,151 @@ import litellm, uuid
from litellm._logging import print_verbose, verbose_logger
def create_client():
try:
import clickhouse_connect
port = os.getenv("CLICKHOUSE_PORT")
clickhouse_host = os.getenv("CLICKHOUSE_HOST")
if clickhouse_host is not None:
verbose_logger.debug("setting up clickhouse")
if port is not None and isinstance(port, str):
port = int(port)
client = clickhouse_connect.get_client(
host=os.getenv("CLICKHOUSE_HOST"),
port=port,
username=os.getenv("CLICKHOUSE_USERNAME"),
password=os.getenv("CLICKHOUSE_PASSWORD"),
)
return client
else:
raise Exception("Clickhouse: Clickhouse host not set")
except Exception as e:
raise ValueError(f"Clickhouse: {e}")
def build_daily_metrics():
click_house_client = create_client()
# get daily spend
daily_spend = click_house_client.query_df(
"""
SELECT sumMerge(DailySpend) as daily_spend, day FROM daily_aggregated_spend GROUP BY day
"""
)
# get daily spend per model
daily_spend_per_model = click_house_client.query_df(
"""
SELECT sumMerge(DailySpend) as daily_spend, day, model FROM daily_aggregated_spend_per_model GROUP BY day, model
"""
)
new_df = daily_spend_per_model.to_dict(orient="records")
import pandas as pd
df = pd.DataFrame(new_df)
# Group by 'day' and create a dictionary for each group
result_dict = {}
for day, group in df.groupby("day"):
models = group["model"].tolist()
spend = group["daily_spend"].tolist()
spend_per_model = {model: spend for model, spend in zip(models, spend)}
result_dict[day] = spend_per_model
# Display the resulting dictionary
# get daily spend per API key
daily_spend_per_api_key = click_house_client.query_df(
"""
SELECT
daily_spend,
day,
api_key
FROM (
SELECT
sumMerge(DailySpend) as daily_spend,
day,
api_key,
RANK() OVER (PARTITION BY day ORDER BY sumMerge(DailySpend) DESC) as spend_rank
FROM
daily_aggregated_spend_per_api_key
GROUP BY
day,
api_key
) AS ranked_api_keys
WHERE
spend_rank <= 5
AND day IS NOT NULL
ORDER BY
day,
daily_spend DESC
"""
)
new_df = daily_spend_per_api_key.to_dict(orient="records")
import pandas as pd
df = pd.DataFrame(new_df)
# Group by 'day' and create a dictionary for each group
api_key_result_dict = {}
for day, group in df.groupby("day"):
api_keys = group["api_key"].tolist()
spend = group["daily_spend"].tolist()
spend_per_api_key = {api_key: spend for api_key, spend in zip(api_keys, spend)}
api_key_result_dict[day] = spend_per_api_key
# Display the resulting dictionary
# Calculate total spend across all days
total_spend = daily_spend["daily_spend"].sum()
# Identify top models and top API keys with the highest spend across all days
top_models = {}
top_api_keys = {}
for day, spend_per_model in result_dict.items():
for model, model_spend in spend_per_model.items():
if model not in top_models or model_spend > top_models[model]:
top_models[model] = model_spend
for day, spend_per_api_key in api_key_result_dict.items():
for api_key, api_key_spend in spend_per_api_key.items():
if api_key not in top_api_keys or api_key_spend > top_api_keys[api_key]:
top_api_keys[api_key] = api_key_spend
# for each day in daily spend, look up the day in result_dict and api_key_result_dict
# Assuming daily_spend DataFrame has 'day' column
result = []
for index, row in daily_spend.iterrows():
day = row["day"]
data_day = row.to_dict()
# Look up in result_dict
if day in result_dict:
spend_per_model = result_dict[day]
# Assuming there is a column named 'model' in daily_spend
data_day["spend_per_model"] = spend_per_model # Assign 0 if model not found
# Look up in api_key_result_dict
if day in api_key_result_dict:
spend_per_api_key = api_key_result_dict[day]
# Assuming there is a column named 'api_key' in daily_spend
data_day["spend_per_api_key"] = spend_per_api_key
result.append(data_day)
data_to_return = {}
data_to_return["daily_spend"] = result
data_to_return["total_spend"] = total_spend
data_to_return["top_models"] = top_models
data_to_return["top_api_keys"] = top_api_keys
return data_to_return
# build_daily_metrics()
def _start_clickhouse():
import clickhouse_connect
@ -86,6 +231,14 @@ def _start_clickhouse():
response = client.query("DESCRIBE default.spend_logs")
verbose_logger.debug(f"spend logs schema ={response.result_rows}")
# RUN Enterprise Clickhouse Setup
# TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
_create_clickhouse_aggregate_tables(client=client, table_names=table_names)
_create_clickhouse_material_views(client=client, table_names=table_names)
class ClickhouseLogger:
# Class variables or attributes

View file

@ -278,7 +278,11 @@ def completion(
import google.auth
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
print_verbose(
f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
)
creds, _ = google.auth.default(quota_project_id=vertex_project)
print_verbose(f"VERTEX AI: creds={creds}")
vertexai.init(
project=vertex_project, location=vertex_location, credentials=creds
)

View file

@ -10,7 +10,6 @@
import os, openai, sys, json, inspect, uuid, datetime, threading
from typing import Any, Literal, Union
from functools import partial
import dotenv, traceback, random, asyncio, time, contextvars
from copy import deepcopy
import httpx
@ -1468,12 +1467,14 @@ def completion(
response = model_response
elif custom_llm_provider == "vertex_ai":
vertex_ai_project = (
optional_params.pop("vertex_ai_project", None)
optional_params.pop("vertex_project", None)
or optional_params.pop("vertex_ai_project", None)
or litellm.vertex_project
or get_secret("VERTEXAI_PROJECT")
)
vertex_ai_location = (
optional_params.pop("vertex_ai_location", None)
optional_params.pop("vertex_location", None)
or optional_params.pop("vertex_ai_location", None)
or litellm.vertex_location
or get_secret("VERTEXAI_LOCATION")
)
@ -2567,12 +2568,14 @@ def embedding(
)
elif custom_llm_provider == "vertex_ai":
vertex_ai_project = (
optional_params.pop("vertex_ai_project", None)
optional_params.pop("vertex_project", None)
or optional_params.pop("vertex_ai_project", None)
or litellm.vertex_project
or get_secret("VERTEXAI_PROJECT")
)
vertex_ai_location = (
optional_params.pop("vertex_ai_location", None)
optional_params.pop("vertex_location", None)
or optional_params.pop("vertex_ai_location", None)
or litellm.vertex_location
or get_secret("VERTEXAI_LOCATION")
)

View file

@ -6,7 +6,8 @@
"input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"gpt-4-turbo-preview": {
"max_tokens": 8192,
@ -15,7 +16,9 @@
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"gpt-4-0314": {
"max_tokens": 8192,
@ -33,7 +36,8 @@
"input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"gpt-4-32k": {
"max_tokens": 32768,
@ -69,7 +73,9 @@
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"gpt-4-0125-preview": {
"max_tokens": 128000,
@ -78,7 +84,9 @@
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"gpt-4-vision-preview": {
"max_tokens": 128000,
@ -105,7 +113,8 @@
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"gpt-3.5-turbo-0301": {
"max_tokens": 4097,
@ -123,7 +132,8 @@
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"gpt-3.5-turbo-1106": {
"max_tokens": 16385,
@ -132,7 +142,9 @@
"input_cost_per_token": 0.0000010,
"output_cost_per_token": 0.0000020,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"gpt-3.5-turbo-0125": {
"max_tokens": 16385,
@ -141,7 +153,9 @@
"input_cost_per_token": 0.0000005,
"output_cost_per_token": 0.0000015,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16385,
@ -286,7 +300,9 @@
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"azure/gpt-4-1106-preview": {
"max_tokens": 128000,
@ -295,7 +311,9 @@
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"azure/gpt-4-0613": {
"max_tokens": 8192,
@ -304,7 +322,8 @@
"input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"azure/gpt-4-32k-0613": {
"max_tokens": 32768,
@ -331,7 +350,8 @@
"input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"azure/gpt-4-turbo": {
"max_tokens": 128000,
@ -340,7 +360,9 @@
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"azure/gpt-4-turbo-vision-preview": {
"max_tokens": 128000,
@ -358,7 +380,8 @@
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"azure/gpt-35-turbo-1106": {
"max_tokens": 16384,
@ -367,7 +390,20 @@
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"azure/gpt-35-turbo-0125": {
"max_tokens": 16384,
"max_input_tokens": 16384,
"max_output_tokens": 4096,
"input_cost_per_token": 0.0000005,
"output_cost_per_token": 0.0000015,
"litellm_provider": "azure",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"azure/gpt-35-turbo-16k": {
"max_tokens": 16385,
@ -385,7 +421,8 @@
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"azure/ada": {
"max_tokens": 8191,
@ -514,11 +551,12 @@
"mode": "chat"
},
"mistral/mistral-large-latest": {
"max_tokens": 8192,
"max_tokens": 32000,
"input_cost_per_token": 0.000008,
"output_cost_per_token": 0.000024,
"litellm_provider": "mistral",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"mistral/mistral-embed": {
"max_tokens": 8192,
@ -676,7 +714,8 @@
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"gemini-1.5-pro": {
"max_tokens": 8192,
@ -1738,6 +1777,23 @@
"output_cost_per_token": 0.0000009,
"litellm_provider": "together_ai"
},
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
"input_cost_per_token": 0.0000006,
"output_cost_per_token": 0.0000006,
"litellm_provider": "together_ai",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"together_ai/mistralai/Mistral-7B-Instruct-v0.1": {
"litellm_provider": "together_ai",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"together_ai/togethercomputer/CodeLlama-34b-Instruct": {
"litellm_provider": "together_ai",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"ollama/llama2": {
"max_tokens": 4096,
"input_cost_per_token": 0.0,
@ -1990,7 +2046,16 @@
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "anyscale",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"anyscale/Mixtral-8x7B-Instruct-v0.1": {
"max_tokens": 16384,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "anyscale",
"mode": "chat",
"supports_function_calling": true
},
"anyscale/HuggingFaceH4/zephyr-7b-beta": {
"max_tokens": 16384,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/6920a121699cde9c.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/a40ad0909dd7838e.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a697f24d60c9c262.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a697f24d60c9c262.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/6920a121699cde9c.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[24143,[\"303\",\"static/chunks/303-d80f23087a9e6aec.js\",\"931\",\"static/chunks/app/page-d4fe4a48cbd3572c.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/6920a121699cde9c.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"eSwVwl_InIrhYtCAqDMKF\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[30280,[\"303\",\"static/chunks/303-d80f23087a9e6aec.js\",\"931\",\"static/chunks/app/page-8f65fc157f538dff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kyOCJPBB9pyUfbMKCAXr-\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[24143,["303","static/chunks/303-d80f23087a9e6aec.js","931","static/chunks/app/page-d4fe4a48cbd3572c.js"],""]
3:I[30280,["303","static/chunks/303-d80f23087a9e6aec.js","931","static/chunks/app/page-8f65fc157f538dff.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["eSwVwl_InIrhYtCAqDMKF",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/6920a121699cde9c.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["kyOCJPBB9pyUfbMKCAXr-",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a40ad0909dd7838e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -221,12 +221,19 @@ class NewUserResponse(GenerateKeyResponse):
class UpdateUserRequest(GenerateRequestBase):
# Note: the defaults of all Params here MUST BE NONE
# else they will get overwritten
user_id: str
user_id: Optional[str] = None
user_email: Optional[str] = None
spend: Optional[float] = None
metadata: Optional[dict] = None
user_role: Optional[str] = None
max_budget: Optional[float] = None
@root_validator(pre=True)
def check_user_info(cls, values):
if values.get("user_id") is None and values.get("user_email") is None:
raise ValueError("Either user id or user email must be provided")
return values
class Member(LiteLLMBase):
role: Literal["admin", "user"]
@ -402,6 +409,9 @@ class ConfigGeneralSettings(LiteLLMBase):
None,
description="sends alerts if requests hang for 5min+",
)
ui_access_mode: Optional[Literal["admin_only", "all"]] = Field(
"all", description="Control access to the Proxy UI"
)
class ConfigYAML(LiteLLMBase):

View file

@ -0,0 +1,66 @@
from litellm.integrations.custom_logger import CustomLogger
import litellm
# This file includes the custom callbacks for LiteLLM Proxy
# Once defined, these can be passed in proxy_config.yaml
class MyCustomHandler(CustomLogger):
def log_pre_api_call(self, model, messages, kwargs):
print(f"Pre-API Call") # noqa
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
print(f"Post-API Call") # noqa
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Stream") # noqa
def log_success_event(self, kwargs, response_obj, start_time, end_time):
print("On Success") # noqa
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Failure") # noqa
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"ishaan async_log_success_event") # noqa
# log: key, user, model, prompt, response, tokens, cost
# Access kwargs passed to litellm.completion()
model = kwargs.get("model", None)
messages = kwargs.get("messages", None)
user = kwargs.get("user", None)
# Access litellm_params passed to litellm.completion(), example access `metadata`
litellm_params = kwargs.get("litellm_params", {})
metadata = litellm_params.get(
"metadata", {}
) # headers passed to LiteLLM proxy, can be found here
return
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
try:
print(f"On Async Failure !") # noqa
print("\nkwargs", kwargs) # noqa
# Access kwargs passed to litellm.completion()
model = kwargs.get("model", None)
messages = kwargs.get("messages", None)
user = kwargs.get("user", None)
# Access litellm_params passed to litellm.completion(), example access `metadata`
litellm_params = kwargs.get("litellm_params", {})
metadata = litellm_params.get(
"metadata", {}
) # headers passed to LiteLLM proxy, can be found here
# Acess Exceptions & Traceback
exception_event = kwargs.get("exception", None)
traceback_event = kwargs.get("traceback_exception", None)
# Calculate cost using litellm.completion_cost()
except Exception as e:
print(f"Exception: {e}") # noqa
proxy_handler_instance = MyCustomHandler()
# Set litellm.callbacks = [proxy_handler_instance] on the proxy
# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy

View file

@ -45,7 +45,7 @@ litellm_settings:
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
success_callback: ['langfuse']
# setting callback class
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
general_settings:
master_key: sk-1234

View file

@ -239,6 +239,9 @@ health_check_interval = None
health_check_results = {}
queue: List = []
litellm_proxy_budget_name = "litellm-proxy-budget"
ui_access_mode: Literal["admin", "all"] = "all"
proxy_budget_rescheduler_min_time = 597
proxy_budget_rescheduler_max_time = 605
### INITIALIZE GLOBAL LOGGING OBJECT ###
proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache)
### REDIS QUEUE ###
@ -1406,7 +1409,7 @@ class ProxyConfig:
"""
Load config values into proxy global state
"""
global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client
global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, proxy_budget_rescheduler_max_time, proxy_budget_rescheduler_min_time, ui_access_mode
# Load existing config
config = await self.get_config(config_file_path=config_file_path)
@ -1713,6 +1716,17 @@ class ProxyConfig:
)
## COST TRACKING ##
cost_tracking()
## ADMIN UI ACCESS ##
ui_access_mode = general_settings.get(
"ui_access_mode", "all"
) # can be either ["admin_only" or "all"]
## BUDGET RESCHEDULER ##
proxy_budget_rescheduler_min_time = general_settings.get(
"proxy_budget_rescheduler_min_time", proxy_budget_rescheduler_min_time
)
proxy_budget_rescheduler_max_time = general_settings.get(
"proxy_budget_rescheduler_max_time", proxy_budget_rescheduler_max_time
)
### BACKGROUND HEALTH CHECKS ###
# Enable background health checks
use_background_health_checks = general_settings.get(
@ -2115,10 +2129,9 @@ async def async_data_generator(response, user_api_key_dict):
try:
start_time = time.time()
async for chunk in response:
verbose_proxy_logger.debug(f"returned chunk: {chunk}")
assert isinstance(chunk, litellm.ModelResponse)
chunk = chunk.model_dump_json(exclude_none=True)
try:
yield f"data: {json.dumps(chunk.model_dump(exclude_none=True))}\n\n"
yield f"data: {chunk}\n\n"
except Exception as e:
yield f"data: {str(e)}\n\n"
@ -2197,7 +2210,7 @@ def parse_cache_control(cache_control):
@router.on_event("startup")
async def startup_event():
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time
import json
### LOAD MASTER KEY ###
@ -2302,13 +2315,12 @@ async def startup_event():
### CHECK IF VIEW EXISTS ###
if prisma_client is not None:
create_view_response = await prisma_client.check_view_exists()
print(f"create_view_response: {create_view_response}") # noqa
### START BUDGET SCHEDULER ###
if prisma_client is not None:
scheduler = AsyncIOScheduler()
interval = random.randint(
597, 605
proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time
) # random interval, so multiple workers avoid resetting budget at the same time
scheduler.add_job(
reset_budget, "interval", seconds=interval, args=[prisma_client]
@ -3775,7 +3787,7 @@ async def view_spend_tags(
@router.get(
"/spend/logs",
tags=["budget & spend Tracking"],
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
responses={
200: {"model": List[LiteLLM_SpendLogs]},
@ -3834,13 +3846,55 @@ async def view_spend_logs(
# gettting spend logs from clickhouse
from litellm.proxy.enterprise.utils import view_spend_logs_from_clickhouse
return await view_spend_logs_from_clickhouse(
api_key=api_key,
user_id=user_id,
request_id=request_id,
daily_metrics = await view_daily_metrics(
start_date=start_date,
end_date=end_date,
)
# get the top api keys across all daily_metrics
top_api_keys = {} # type: ignore
# make this compatible with the admin UI
for response in daily_metrics.get("daily_spend", {}):
response["startTime"] = response["day"]
response["spend"] = response["daily_spend"]
response["models"] = response["spend_per_model"]
response["users"] = {"ishaan": 0.0}
spend_per_api_key = response["spend_per_api_key"]
# insert spend_per_api_key key, values in response
for key, value in spend_per_api_key.items():
response[key] = value
top_api_keys[key] = top_api_keys.get(key, 0.0) + value
del response["day"]
del response["daily_spend"]
del response["spend_per_model"]
del response["spend_per_api_key"]
# get top 5 api keys
top_api_keys = sorted(top_api_keys.items(), key=lambda x: x[1], reverse=True) # type: ignore
top_api_keys = top_api_keys[:5] # type: ignore
top_api_keys = dict(top_api_keys) # type: ignore
"""
set it like this
{
"key" : key,
"spend:" : spend
}
"""
# we need this to show on the Admin UI
response_keys = []
for key in top_api_keys.items():
response_keys.append(
{
"key": key[0],
"spend": key[1],
}
)
daily_metrics["top_api_keys"] = response_keys
return daily_metrics
global prisma_client
try:
verbose_proxy_logger.debug("inside view_spend_logs")
@ -3993,6 +4047,142 @@ async def view_spend_logs(
)
@router.get(
"/global/spend/logs",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
)
async def global_spend_logs():
"""
[BETA] This is a beta endpoint. It will change.
Use this to get global spend (spend per day for last 30d). Admin-only endpoint
More efficient implementation of /spend/logs, by creating a view over the spend logs table.
"""
global prisma_client
sql_query = """SELECT * FROM "MonthlyGlobalSpend";"""
response = await prisma_client.db.query_raw(query=sql_query)
return response
@router.get(
"/global/spend/keys",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
)
async def global_spend_keys(
limit: int = fastapi.Query(
default=None,
description="Number of keys to get. Will return Top 'n' keys.",
)
):
"""
[BETA] This is a beta endpoint. It will change.
Use this to get the top 'n' keys with the highest spend, ordered by spend.
"""
global prisma_client
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
sql_query = f"""SELECT * FROM "Last30dKeysBySpend" LIMIT {limit};"""
response = await prisma_client.db.query_raw(query=sql_query)
return response
@router.get(
"/global/spend/models",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
)
async def global_spend_models(
limit: int = fastapi.Query(
default=None,
description="Number of models to get. Will return Top 'n' models.",
)
):
"""
[BETA] This is a beta endpoint. It will change.
Use this to get the top 'n' keys with the highest spend, ordered by spend.
"""
global prisma_client
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
sql_query = f"""SELECT * FROM "Last30dModelsBySpend" LIMIT {limit};"""
response = await prisma_client.db.query_raw(query=sql_query)
return response
@router.get(
"/daily_metrics",
summary="Get daily spend metrics",
tags=["budget & spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
)
async def view_daily_metrics(
start_date: Optional[str] = fastapi.Query(
default=None,
description="Time from which to start viewing key spend",
),
end_date: Optional[str] = fastapi.Query(
default=None,
description="Time till which to view key spend",
),
):
"""
[BETA] This is a beta endpoint. It might change without notice.
Please give feedback - https://github.com/BerriAI/litellm/issues
"""
try:
if os.getenv("CLICKHOUSE_HOST") is not None:
# gettting spend logs from clickhouse
from litellm.integrations import clickhouse
return clickhouse.build_daily_metrics()
# create a response object
"""
{
"date": "2022-01-01",
"spend": 0.0,
"users": {},
"models": {},
}
"""
else:
raise Exception(
"Clickhouse: Clickhouse host not set. Required for viewing /daily/metrics"
)
except Exception as e:
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"/spend/logs Error({str(e)})"),
type="internal_error",
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
)
elif isinstance(e, ProxyException):
raise e
raise ProxyException(
message="/spend/logs Error" + str(e),
type="internal_error",
param=getattr(e, "param", "None"),
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
#### USER MANAGEMENT ####
@router.post(
"/user/new",
@ -4264,12 +4454,32 @@ async def user_update(data: UpdateUserRequest):
): # models default to [], spend defaults to 0, we should not reset these values
non_default_values[k] = v
response = await prisma_client.update_data(
user_id=data_json["user_id"],
data=non_default_values,
update_key_values=non_default_values,
)
return {"user_id": data_json["user_id"], **non_default_values}
## ADD USER, IF NEW ##
if data.user_id is not None and len(data.user_id) == 0:
non_default_values["user_id"] = data.user_id # type: ignore
await prisma_client.update_data(
user_id=data.user_id,
data=non_default_values,
table_name="user",
)
elif data.user_email is not None:
non_default_values["user_id"] = str(uuid.uuid4())
non_default_values["user_email"] = data.user_email
## user email is not unique acc. to prisma schema -> future improvement
### for now: check if it exists in db, if not - insert it
existing_user_row = await prisma_client.get_data(
key_val={"user_email": data.user_email},
table_name="user",
query_type="find_all",
)
if existing_user_row is None or (
isinstance(existing_user_row, list) and len(existing_user_row) == 0
):
await prisma_client.insert_data(
data=non_default_values, table_name="user"
)
return non_default_values
# update based on remaining passed in values
except Exception as e:
traceback.print_exc()
@ -4472,6 +4682,40 @@ async def unblock_user(data: BlockUsers):
return {"blocked_users": litellm.blocked_user_list}
@router.get(
"/user/get_users",
tags=["user management"],
dependencies=[Depends(user_api_key_auth)],
)
async def get_users(
role: str = fastapi.Query(
default=None,
description="Either 'proxy_admin', 'proxy_viewer', 'app_owner', 'app_user'",
)
):
"""
[BETA] This could change without notice. Give feedback - https://github.com/BerriAI/litellm/issues
Get all users who are a specific `user_role`.
Used by the UI to populate the user lists.
Currently - admin-only endpoint.
"""
global prisma_client
if prisma_client is None:
raise HTTPException(
status_code=500,
detail={"error": f"No db connected. prisma client={prisma_client}"},
)
all_users = await prisma_client.get_data(
table_name="user", query_type="find_all", key_val={"user_role": role}
)
return all_users
#### TEAM MANAGEMENT ####
@ -4621,9 +4865,9 @@ async def update_team(
):
"""
[BETA]
[DEPRECATED] - use the `/team/member_add` and `/team/member_remove` endpoints instead
[RECOMMENDED] - use `/team/member_add` to add new team members instead
You can now add / delete users from a team via /team/update
You can now update team budget / rate limits via /team/update
```
curl --location 'http://0.0.0.0:8000/team/update' \
@ -5620,7 +5864,7 @@ def get_image():
@app.get("/sso/callback", tags=["experimental"])
async def auth_callback(request: Request):
"""Verify login"""
global general_settings
global general_settings, ui_access_mode
microsoft_client_id = os.getenv("MICROSOFT_CLIENT_ID", None)
google_client_id = os.getenv("GOOGLE_CLIENT_ID", None)
generic_client_id = os.getenv("GENERIC_CLIENT_ID", None)
@ -5811,6 +6055,7 @@ async def auth_callback(request: Request):
"user_email": user_email,
}
try:
user_role = None
if prisma_client is not None:
user_info = await prisma_client.get_data(user_id=user_id, table_name="user")
verbose_proxy_logger.debug(
@ -5822,6 +6067,7 @@ async def auth_callback(request: Request):
"user_id": getattr(user_info, "user_id", user_id),
"user_email": getattr(user_info, "user_id", user_email),
}
user_role = getattr(user_info, "user_role", None)
elif litellm.default_user_params is not None and isinstance(
litellm.default_user_params, dict
):
@ -5844,13 +6090,27 @@ async def auth_callback(request: Request):
key = response["token"] # type: ignore
user_id = response["user_id"] # type: ignore
litellm_dashboard_ui = "/ui/"
user_role = "app_owner"
user_role = user_role or "app_owner"
if (
os.getenv("PROXY_ADMIN_ID", None) is not None
and os.environ["PROXY_ADMIN_ID"] == user_id
):
# checks if user is admin
user_role = "app_admin"
verbose_proxy_logger.debug(
f"user_role: {user_role}; ui_access_mode: {ui_access_mode}"
)
## CHECK IF ROLE ALLOWED TO USE PROXY ##
if ui_access_mode == "admin_only" and "admin" not in user_role:
verbose_proxy_logger.debug("EXCEPTION RAISED")
raise HTTPException(
status_code=401,
detail={
"error": f"User not allowed to access proxy. User role={user_role}, proxy mode={ui_access_mode}"
},
)
import jwt
jwt_token = jwt.encode(

View file

@ -489,18 +489,20 @@ class PrismaClient:
)
async def check_view_exists(self):
"""
Checks if the LiteLLM_VerificationTokenView exists in the user's db.
Checks if the LiteLLM_VerificationTokenView and MonthlyGlobalSpend exists in the user's db.
This is used for getting the token + team data in user_api_key_auth
LiteLLM_VerificationTokenView: This view is used for getting the token + team data in user_api_key_auth
MonthlyGlobalSpend: This view is used for the admin view to see global spend for this month
If the view doesn't exist, one will be created.
"""
try:
# Try to select one row from the view
await self.db.execute_raw(
await self.db.query_raw(
"""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1"""
)
return "LiteLLM_VerificationTokenView Exists!"
print("LiteLLM_VerificationTokenView Exists!") # noqa
except Exception as e:
# If an error occurs, the view does not exist, so create it
value = await self.health_check()
@ -518,7 +520,29 @@ class PrismaClient:
"""
)
return "LiteLLM_VerificationTokenView Created!"
print("LiteLLM_VerificationTokenView Created!") # noqa
try:
await self.db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""")
print("MonthlyGlobalSpend Exists!") # noqa
except Exception as e:
sql_query = """
CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS
SELECT
DATE("startTime") AS date,
SUM("spend") AS spend
FROM
"LiteLLM_SpendLogs"
WHERE
"startTime" >= (CURRENT_DATE - INTERVAL '30 days')
GROUP BY
DATE("startTime");
"""
await self.db.execute_raw(query=sql_query)
print("MonthlyGlobalSpend Created!") # noqa
return
@backoff.on_exception(
backoff.expo,

View file

@ -1,253 +1,254 @@
import sys
import os
import io, asyncio
## @pytest.mark.skip(reason="AWS Suspended Account")
# import sys
# import os
# import io, asyncio
# import logging
# logging.basicConfig(level=logging.DEBUG)
sys.path.insert(0, os.path.abspath("../.."))
# # import logging
# # logging.basicConfig(level=logging.DEBUG)
# sys.path.insert(0, os.path.abspath("../.."))
from litellm import completion
import litellm
# from litellm import completion
# import litellm
litellm.num_retries = 3
# litellm.num_retries = 3
import time, random
import pytest
# import time, random
# import pytest
def test_s3_logging():
# all s3 requests need to be in one test function
# since we are modifying stdout, and pytests runs tests in parallel
# on circle ci - we only test litellm.acompletion()
try:
# redirect stdout to log_file
litellm.cache = litellm.Cache(
type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
)
# def test_s3_logging():
# # all s3 requests need to be in one test function
# # since we are modifying stdout, and pytests runs tests in parallel
# # on circle ci - we only test litellm.acompletion()
# try:
# # redirect stdout to log_file
# litellm.cache = litellm.Cache(
# type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
# )
litellm.success_callback = ["s3"]
litellm.s3_callback_params = {
"s3_bucket_name": "litellm-logs",
"s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
"s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
}
litellm.set_verbose = True
# litellm.success_callback = ["s3"]
# litellm.s3_callback_params = {
# "s3_bucket_name": "litellm-logs",
# "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
# "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
# }
# litellm.set_verbose = True
print("Testing async s3 logging")
# print("Testing async s3 logging")
expected_keys = []
# expected_keys = []
import time
# import time
curr_time = str(time.time())
# curr_time = str(time.time())
async def _test():
return await litellm.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
max_tokens=10,
temperature=0.7,
user="ishaan-2",
)
# async def _test():
# return await litellm.acompletion(
# model="gpt-3.5-turbo",
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
# max_tokens=10,
# temperature=0.7,
# user="ishaan-2",
# )
response = asyncio.run(_test())
print(f"response: {response}")
expected_keys.append(response.id)
# response = asyncio.run(_test())
# print(f"response: {response}")
# expected_keys.append(response.id)
async def _test():
return await litellm.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
max_tokens=10,
temperature=0.7,
user="ishaan-2",
)
# async def _test():
# return await litellm.acompletion(
# model="gpt-3.5-turbo",
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
# max_tokens=10,
# temperature=0.7,
# user="ishaan-2",
# )
response = asyncio.run(_test())
expected_keys.append(response.id)
print(f"response: {response}")
time.sleep(5) # wait 5s for logs to land
# response = asyncio.run(_test())
# expected_keys.append(response.id)
# print(f"response: {response}")
# time.sleep(5) # wait 5s for logs to land
import boto3
# import boto3
s3 = boto3.client("s3")
bucket_name = "litellm-logs"
# List objects in the bucket
response = s3.list_objects(Bucket=bucket_name)
# s3 = boto3.client("s3")
# bucket_name = "litellm-logs"
# # List objects in the bucket
# response = s3.list_objects(Bucket=bucket_name)
# Sort the objects based on the LastModified timestamp
objects = sorted(
response["Contents"], key=lambda x: x["LastModified"], reverse=True
)
# Get the keys of the most recent objects
most_recent_keys = [obj["Key"] for obj in objects]
print(most_recent_keys)
# for each key, get the part before "-" as the key. Do it safely
cleaned_keys = []
for key in most_recent_keys:
split_key = key.split("_")
if len(split_key) < 2:
continue
cleaned_keys.append(split_key[1])
print("\n most recent keys", most_recent_keys)
print("\n cleaned keys", cleaned_keys)
print("\n Expected keys: ", expected_keys)
matches = 0
for key in expected_keys:
key += ".json"
assert key in cleaned_keys
# # Sort the objects based on the LastModified timestamp
# objects = sorted(
# response["Contents"], key=lambda x: x["LastModified"], reverse=True
# )
# # Get the keys of the most recent objects
# most_recent_keys = [obj["Key"] for obj in objects]
# print(most_recent_keys)
# # for each key, get the part before "-" as the key. Do it safely
# cleaned_keys = []
# for key in most_recent_keys:
# split_key = key.split("_")
# if len(split_key) < 2:
# continue
# cleaned_keys.append(split_key[1])
# print("\n most recent keys", most_recent_keys)
# print("\n cleaned keys", cleaned_keys)
# print("\n Expected keys: ", expected_keys)
# matches = 0
# for key in expected_keys:
# key += ".json"
# assert key in cleaned_keys
if key in cleaned_keys:
matches += 1
# remove the match key
cleaned_keys.remove(key)
# this asserts we log, the first request + the 2nd cached request
print("we had two matches ! passed ", matches)
assert matches == 2
try:
# cleanup s3 bucket in test
for key in most_recent_keys:
s3.delete_object(Bucket=bucket_name, Key=key)
except:
# don't let cleanup fail a test
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
finally:
# post, close log file and verify
# Reset stdout to the original value
print("Passed! Testing async s3 logging")
# if key in cleaned_keys:
# matches += 1
# # remove the match key
# cleaned_keys.remove(key)
# # this asserts we log, the first request + the 2nd cached request
# print("we had two matches ! passed ", matches)
# assert matches == 2
# try:
# # cleanup s3 bucket in test
# for key in most_recent_keys:
# s3.delete_object(Bucket=bucket_name, Key=key)
# except:
# # don't let cleanup fail a test
# pass
# except Exception as e:
# pytest.fail(f"An exception occurred - {e}")
# finally:
# # post, close log file and verify
# # Reset stdout to the original value
# print("Passed! Testing async s3 logging")
# test_s3_logging()
# # test_s3_logging()
def test_s3_logging_async():
# this tests time added to make s3 logging calls, vs just acompletion calls
try:
litellm.set_verbose = True
# Make 5 calls with an empty success_callback
litellm.success_callback = []
start_time_empty_callback = asyncio.run(make_async_calls())
print("done with no callback test")
# def test_s3_logging_async():
# # this tests time added to make s3 logging calls, vs just acompletion calls
# try:
# litellm.set_verbose = True
# # Make 5 calls with an empty success_callback
# litellm.success_callback = []
# start_time_empty_callback = asyncio.run(make_async_calls())
# print("done with no callback test")
print("starting s3 logging load test")
# Make 5 calls with success_callback set to "langfuse"
litellm.success_callback = ["s3"]
litellm.s3_callback_params = {
"s3_bucket_name": "litellm-logs",
"s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
"s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
}
start_time_s3 = asyncio.run(make_async_calls())
print("done with s3 test")
# print("starting s3 logging load test")
# # Make 5 calls with success_callback set to "langfuse"
# litellm.success_callback = ["s3"]
# litellm.s3_callback_params = {
# "s3_bucket_name": "litellm-logs",
# "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
# "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
# }
# start_time_s3 = asyncio.run(make_async_calls())
# print("done with s3 test")
# Compare the time for both scenarios
print(f"Time taken with success_callback='s3': {start_time_s3}")
print(f"Time taken with empty success_callback: {start_time_empty_callback}")
# # Compare the time for both scenarios
# print(f"Time taken with success_callback='s3': {start_time_s3}")
# print(f"Time taken with empty success_callback: {start_time_empty_callback}")
# assert the diff is not more than 1 second
assert abs(start_time_s3 - start_time_empty_callback) < 1
# # assert the diff is not more than 1 second
# assert abs(start_time_s3 - start_time_empty_callback) < 1
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
# except litellm.Timeout as e:
# pass
# except Exception as e:
# pytest.fail(f"An exception occurred - {e}")
async def make_async_calls():
tasks = []
for _ in range(5):
task = asyncio.create_task(
litellm.acompletion(
model="azure/chatgpt-v-2",
messages=[{"role": "user", "content": "This is a test"}],
max_tokens=5,
temperature=0.7,
timeout=5,
user="langfuse_latency_test_user",
mock_response="It's simple to use and easy to get started",
)
)
tasks.append(task)
# async def make_async_calls():
# tasks = []
# for _ in range(5):
# task = asyncio.create_task(
# litellm.acompletion(
# model="azure/chatgpt-v-2",
# messages=[{"role": "user", "content": "This is a test"}],
# max_tokens=5,
# temperature=0.7,
# timeout=5,
# user="langfuse_latency_test_user",
# mock_response="It's simple to use and easy to get started",
# )
# )
# tasks.append(task)
# Measure the start time before running the tasks
start_time = asyncio.get_event_loop().time()
# # Measure the start time before running the tasks
# start_time = asyncio.get_event_loop().time()
# Wait for all tasks to complete
responses = await asyncio.gather(*tasks)
# # Wait for all tasks to complete
# responses = await asyncio.gather(*tasks)
# Print the responses when tasks return
for idx, response in enumerate(responses):
print(f"Response from Task {idx + 1}: {response}")
# # Print the responses when tasks return
# for idx, response in enumerate(responses):
# print(f"Response from Task {idx + 1}: {response}")
# Calculate the total time taken
total_time = asyncio.get_event_loop().time() - start_time
# # Calculate the total time taken
# total_time = asyncio.get_event_loop().time() - start_time
return total_time
# return total_time
def test_s3_logging_r2():
# all s3 requests need to be in one test function
# since we are modifying stdout, and pytests runs tests in parallel
# on circle ci - we only test litellm.acompletion()
try:
# redirect stdout to log_file
# litellm.cache = litellm.Cache(
# type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2"
# )
litellm.set_verbose = True
from litellm._logging import verbose_logger
import logging
# def test_s3_logging_r2():
# # all s3 requests need to be in one test function
# # since we are modifying stdout, and pytests runs tests in parallel
# # on circle ci - we only test litellm.acompletion()
# try:
# # redirect stdout to log_file
# # litellm.cache = litellm.Cache(
# # type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2"
# # )
# litellm.set_verbose = True
# from litellm._logging import verbose_logger
# import logging
verbose_logger.setLevel(level=logging.DEBUG)
# verbose_logger.setLevel(level=logging.DEBUG)
litellm.success_callback = ["s3"]
litellm.s3_callback_params = {
"s3_bucket_name": "litellm-r2-bucket",
"s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY",
"s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID",
"s3_endpoint_url": "os.environ/R2_S3_URL",
"s3_region_name": "os.environ/R2_S3_REGION_NAME",
}
print("Testing async s3 logging")
# litellm.success_callback = ["s3"]
# litellm.s3_callback_params = {
# "s3_bucket_name": "litellm-r2-bucket",
# "s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY",
# "s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID",
# "s3_endpoint_url": "os.environ/R2_S3_URL",
# "s3_region_name": "os.environ/R2_S3_REGION_NAME",
# }
# print("Testing async s3 logging")
expected_keys = []
# expected_keys = []
import time
# import time
curr_time = str(time.time())
# curr_time = str(time.time())
async def _test():
return await litellm.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
max_tokens=10,
temperature=0.7,
user="ishaan-2",
)
# async def _test():
# return await litellm.acompletion(
# model="gpt-3.5-turbo",
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
# max_tokens=10,
# temperature=0.7,
# user="ishaan-2",
# )
response = asyncio.run(_test())
print(f"response: {response}")
expected_keys.append(response.id)
# response = asyncio.run(_test())
# print(f"response: {response}")
# expected_keys.append(response.id)
import boto3
# import boto3
s3 = boto3.client(
"s3",
endpoint_url=os.getenv("R2_S3_URL"),
region_name=os.getenv("R2_S3_REGION_NAME"),
aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"),
aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"),
)
# s3 = boto3.client(
# "s3",
# endpoint_url=os.getenv("R2_S3_URL"),
# region_name=os.getenv("R2_S3_REGION_NAME"),
# aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"),
# aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"),
# )
bucket_name = "litellm-r2-bucket"
# List objects in the bucket
response = s3.list_objects(Bucket=bucket_name)
# bucket_name = "litellm-r2-bucket"
# # List objects in the bucket
# response = s3.list_objects(Bucket=bucket_name)
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
finally:
# post, close log file and verify
# Reset stdout to the original value
print("Passed! Testing async s3 logging")
# except Exception as e:
# pytest.fail(f"An exception occurred - {e}")
# finally:
# # post, close log file and verify
# # Reset stdout to the original value
# print("Passed! Testing async s3 logging")

View file

@ -110,6 +110,7 @@ def test_vertex_ai():
"code-bison@001",
"text-bison@001",
"gemini-1.5-pro",
"gemini-1.5-pro-preview-0215",
"gemini-1.5-pro-vision",
]:
# our account does not have access to this model
@ -129,6 +130,8 @@ def test_vertex_ai():
f"response.choices[0].finish_reason: {response.choices[0].finish_reason}"
)
assert response.choices[0].finish_reason in litellm._openai_finish_reasons
except litellm.RateLimitError as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
@ -160,6 +163,7 @@ def test_vertex_ai_stream():
"code-bison@001",
"text-bison@001",
"gemini-1.5-pro",
"gemini-1.5-pro-preview-0215",
"gemini-1.5-pro-vision",
]:
# our account does not have access to this model
@ -181,6 +185,8 @@ def test_vertex_ai_stream():
assert type(content) == str
# pass
assert len(completed_str) > 4
except litellm.RateLimitError as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
@ -211,6 +217,7 @@ async def test_async_vertexai_response():
"code-bison@001",
"text-bison@001",
"gemini-1.5-pro",
"gemini-1.5-pro-preview-0215",
"gemini-1.5-pro-vision",
]:
# our account does not have access to this model
@ -255,6 +262,7 @@ async def test_async_vertexai_streaming_response():
"code-bison@001",
"text-bison@001",
"gemini-1.5-pro",
"gemini-1.5-pro-preview-0215",
"gemini-1.5-pro-vision",
]:
# our account does not have access to this model

View file

@ -193,16 +193,26 @@ async def test_hf_completion_tgi():
# Add any assertions here to check the response
print(response)
except litellm.APIError as e:
print("got an api error")
pass
except litellm.Timeout as e:
print("got a timeout error")
pass
except litellm.RateLimitError as e:
# this will catch the model is overloaded error
print("got a rate limit error")
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
if "Model is overloaded" in str(e):
pass
else:
pytest.fail(f"Error occurred: {e}")
# test_get_cloudflare_response_streaming()
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_completion_sagemaker():
# litellm.set_verbose=True

View file

@ -1,257 +1,259 @@
import sys, os
import traceback
from dotenv import load_dotenv
# @pytest.mark.skip(reason="AWS Suspended Account")
# import sys, os
# import traceback
# from dotenv import load_dotenv
load_dotenv()
import os, io
# load_dotenv()
# import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
import litellm
from litellm import embedding, completion, completion_cost, Timeout
from litellm import RateLimitError
# sys.path.insert(
# 0, os.path.abspath("../..")
# ) # Adds the parent directory to the system path
# import pytest
# import litellm
# from litellm import embedding, completion, completion_cost, Timeout
# from litellm import RateLimitError
# litellm.num_retries = 3
litellm.cache = None
litellm.success_callback = []
user_message = "Write a short poem about the sky"
messages = [{"content": user_message, "role": "user"}]
# # litellm.num_retries = 3
# litellm.cache = None
# litellm.success_callback = []
# user_message = "Write a short poem about the sky"
# messages = [{"content": user_message, "role": "user"}]
@pytest.fixture(autouse=True)
def reset_callbacks():
print("\npytest fixture - resetting callbacks")
litellm.success_callback = []
litellm._async_success_callback = []
litellm.failure_callback = []
litellm.callbacks = []
# @pytest.fixture(autouse=True)
# def reset_callbacks():
# print("\npytest fixture - resetting callbacks")
# litellm.success_callback = []
# litellm._async_success_callback = []
# litellm.failure_callback = []
# litellm.callbacks = []
def test_completion_bedrock_claude_completion_auth():
print("calling bedrock claude completion params auth")
import os
# def test_completion_bedrock_claude_completion_auth():
# print("calling bedrock claude completion params auth")
# import os
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
# aws_region_name = os.environ["AWS_REGION_NAME"]
os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
os.environ.pop("AWS_REGION_NAME", None)
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
# os.environ.pop("AWS_REGION_NAME", None)
try:
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=messages,
max_tokens=10,
temperature=0.1,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_region_name=aws_region_name,
)
# Add any assertions here to check the response
print(response)
# try:
# response = completion(
# model="bedrock/anthropic.claude-instant-v1",
# messages=messages,
# max_tokens=10,
# temperature=0.1,
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_region_name=aws_region_name,
# )
# # Add any assertions here to check the response
# print(response)
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_REGION_NAME"] = aws_region_name
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
# os.environ["AWS_REGION_NAME"] = aws_region_name
# except RateLimitError:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_completion_auth()
# # test_completion_bedrock_claude_completion_auth()
def test_completion_bedrock_claude_2_1_completion_auth():
print("calling bedrock claude 2.1 completion params auth")
import os
# def test_completion_bedrock_claude_2_1_completion_auth():
# print("calling bedrock claude 2.1 completion params auth")
# import os
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
# aws_region_name = os.environ["AWS_REGION_NAME"]
os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
os.environ.pop("AWS_REGION_NAME", None)
try:
response = completion(
model="bedrock/anthropic.claude-v2:1",
messages=messages,
max_tokens=10,
temperature=0.1,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_region_name=aws_region_name,
)
# Add any assertions here to check the response
print(response)
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
# os.environ.pop("AWS_REGION_NAME", None)
# try:
# response = completion(
# model="bedrock/anthropic.claude-v2:1",
# messages=messages,
# max_tokens=10,
# temperature=0.1,
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_region_name=aws_region_name,
# )
# # Add any assertions here to check the response
# print(response)
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_REGION_NAME"] = aws_region_name
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
# os.environ["AWS_REGION_NAME"] = aws_region_name
# except RateLimitError:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_2_1_completion_auth()
# # test_completion_bedrock_claude_2_1_completion_auth()
def test_completion_bedrock_claude_external_client_auth():
print("\ncalling bedrock claude external client auth")
import os
# def test_completion_bedrock_claude_external_client_auth():
# print("\ncalling bedrock claude external client auth")
# import os
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
# aws_region_name = os.environ["AWS_REGION_NAME"]
os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
os.environ.pop("AWS_REGION_NAME", None)
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
# os.environ.pop("AWS_REGION_NAME", None)
try:
import boto3
# try:
# import boto3
litellm.set_verbose = True
# litellm.set_verbose = True
bedrock = boto3.client(
service_name="bedrock-runtime",
region_name=aws_region_name,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
)
# bedrock = boto3.client(
# service_name="bedrock-runtime",
# region_name=aws_region_name,
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
# )
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=messages,
max_tokens=10,
temperature=0.1,
aws_bedrock_client=bedrock,
)
# Add any assertions here to check the response
print(response)
# response = completion(
# model="bedrock/anthropic.claude-instant-v1",
# messages=messages,
# max_tokens=10,
# temperature=0.1,
# aws_bedrock_client=bedrock,
# )
# # Add any assertions here to check the response
# print(response)
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_REGION_NAME"] = aws_region_name
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
# os.environ["AWS_REGION_NAME"] = aws_region_name
# except RateLimitError:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_external_client_auth()
# # test_completion_bedrock_claude_external_client_auth()
def test_completion_bedrock_claude_sts_client_auth():
print("\ncalling bedrock claude external client auth")
import os
# @pytest.mark.skip(reason="Expired token, need to renew")
# def test_completion_bedrock_claude_sts_client_auth():
# print("\ncalling bedrock claude external client auth")
# import os
aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"]
aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
# aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
# aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
# aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
try:
import boto3
# try:
# import boto3
litellm.set_verbose = True
# litellm.set_verbose = True
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=messages,
max_tokens=10,
temperature=0.1,
aws_region_name=aws_region_name,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
# response = completion(
# model="bedrock/anthropic.claude-instant-v1",
# messages=messages,
# max_tokens=10,
# temperature=0.1,
# aws_region_name=aws_region_name,
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_role_name=aws_role_name,
# aws_session_name="my-test-session",
# )
response = embedding(
model="cohere.embed-multilingual-v3",
input=["hello world"],
aws_region_name="us-east-1",
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
# response = embedding(
# model="cohere.embed-multilingual-v3",
# input=["hello world"],
# aws_region_name="us-east-1",
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_role_name=aws_role_name,
# aws_session_name="my-test-session",
# )
response = completion(
model="gpt-3.5-turbo",
messages=messages,
aws_region_name="us-east-1",
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
# Add any assertions here to check the response
print(response)
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# response = completion(
# model="gpt-3.5-turbo",
# messages=messages,
# aws_region_name="us-east-1",
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_role_name=aws_role_name,
# aws_session_name="my-test-session",
# )
# # Add any assertions here to check the response
# print(response)
# except RateLimitError:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
test_completion_bedrock_claude_sts_client_auth()
# # test_completion_bedrock_claude_sts_client_auth()
def test_provisioned_throughput():
try:
litellm.set_verbose = True
import botocore, json, io
import botocore.session
from botocore.stub import Stubber
# def test_provisioned_throughput():
# try:
# litellm.set_verbose = True
# import botocore, json, io
# import botocore.session
# from botocore.stub import Stubber
bedrock_client = botocore.session.get_session().create_client(
"bedrock-runtime", region_name="us-east-1"
)
# bedrock_client = botocore.session.get_session().create_client(
# "bedrock-runtime", region_name="us-east-1"
# )
expected_params = {
"accept": "application/json",
"body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
'"max_tokens_to_sample": 256}',
"contentType": "application/json",
"modelId": "provisioned-model-arn",
}
response_from_bedrock = {
"body": io.StringIO(
json.dumps(
{
"completion": " Here is a short poem about the sky:",
"stop_reason": "max_tokens",
"stop": None,
}
)
),
"contentType": "contentType",
"ResponseMetadata": {"HTTPStatusCode": 200},
}
# expected_params = {
# "accept": "application/json",
# "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
# '"max_tokens_to_sample": 256}',
# "contentType": "application/json",
# "modelId": "provisioned-model-arn",
# }
# response_from_bedrock = {
# "body": io.StringIO(
# json.dumps(
# {
# "completion": " Here is a short poem about the sky:",
# "stop_reason": "max_tokens",
# "stop": None,
# }
# )
# ),
# "contentType": "contentType",
# "ResponseMetadata": {"HTTPStatusCode": 200},
# }
with Stubber(bedrock_client) as stubber:
stubber.add_response(
"invoke_model",
service_response=response_from_bedrock,
expected_params=expected_params,
)
response = litellm.completion(
model="bedrock/anthropic.claude-instant-v1",
model_id="provisioned-model-arn",
messages=[{"content": "Hello, how are you?", "role": "user"}],
aws_bedrock_client=bedrock_client,
)
print("response stubbed", response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# with Stubber(bedrock_client) as stubber:
# stubber.add_response(
# "invoke_model",
# service_response=response_from_bedrock,
# expected_params=expected_params,
# )
# response = litellm.completion(
# model="bedrock/anthropic.claude-instant-v1",
# model_id="provisioned-model-arn",
# messages=[{"content": "Hello, how are you?", "role": "user"}],
# aws_bedrock_client=bedrock_client,
# )
# print("response stubbed", response)
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_provisioned_throughput()
# # test_provisioned_throughput()

View file

@ -546,6 +546,7 @@ def test_redis_cache_acompletion_stream():
# test_redis_cache_acompletion_stream()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_redis_cache_acompletion_stream_bedrock():
import asyncio
@ -571,7 +572,7 @@ def test_redis_cache_acompletion_stream_bedrock():
async def call1():
nonlocal response_1_content
response1 = await litellm.acompletion(
model="bedrock/anthropic.claude-v1",
model="bedrock/anthropic.claude-v2",
messages=messages,
max_tokens=40,
temperature=1,
@ -589,7 +590,7 @@ def test_redis_cache_acompletion_stream_bedrock():
async def call2():
nonlocal response_2_content
response2 = await litellm.acompletion(
model="bedrock/anthropic.claude-v1",
model="bedrock/anthropic.claude-v2",
messages=messages,
max_tokens=40,
temperature=1,
@ -615,6 +616,7 @@ def test_redis_cache_acompletion_stream_bedrock():
raise e
@pytest.mark.skip(reason="AWS Suspended Account")
def test_s3_cache_acompletion_stream_azure():
import asyncio
@ -697,6 +699,7 @@ def test_s3_cache_acompletion_stream_azure():
@pytest.mark.asyncio
@pytest.mark.skip(reason="AWS Suspended Account")
async def test_s3_cache_acompletion_azure():
import asyncio
import logging

View file

@ -1404,6 +1404,7 @@ def test_customprompt_together_ai():
# test_customprompt_together_ai()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_sagemaker():
try:
litellm.set_verbose = True
@ -1429,6 +1430,7 @@ def test_completion_sagemaker():
# test_completion_sagemaker()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_sagemaker_stream():
try:
litellm.set_verbose = False
@ -1459,6 +1461,7 @@ def test_completion_sagemaker_stream():
pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_chat_sagemaker():
try:
messages = [{"role": "user", "content": "Hey, how's it going?"}]
@ -1483,6 +1486,7 @@ def test_completion_chat_sagemaker():
# test_completion_chat_sagemaker()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_chat_sagemaker_mistral():
try:
messages = [{"role": "user", "content": "Hey, how's it going?"}]
@ -1501,6 +1505,7 @@ def test_completion_chat_sagemaker_mistral():
# test_completion_chat_sagemaker_mistral()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_titan_null_response():
try:
response = completion(
@ -1526,6 +1531,7 @@ def test_completion_bedrock_titan_null_response():
pytest.fail(f"An error occurred - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_titan():
try:
response = completion(
@ -1547,6 +1553,7 @@ def test_completion_bedrock_titan():
# test_completion_bedrock_titan()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_claude():
print("calling claude")
try:
@ -1568,6 +1575,7 @@ def test_completion_bedrock_claude():
# test_completion_bedrock_claude()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_cohere():
print("calling bedrock cohere")
litellm.set_verbose = True
@ -1954,12 +1962,15 @@ def test_completion_gemini():
messages = [{"role": "user", "content": "Hey, how's it going?"}]
try:
response = completion(model=model_name, messages=messages)
# Add any assertions here to check the response
# Add any assertions,here to check the response
print(response)
except litellm.APIError as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
if "InternalServerError" in str(e):
pass
else:
pytest.fail(f"Error occurred: {e}")
# test_completion_gemini()
@ -1974,8 +1985,13 @@ async def test_acompletion_gemini():
response = await litellm.acompletion(model=model_name, messages=messages)
# Add any assertions here to check the response
print(f"response: {response}")
except litellm.APIError as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
if "InternalServerError" in str(e):
pass
else:
pytest.fail(f"Error occurred: {e}")
# Palm tests

View file

@ -171,6 +171,7 @@ def test_cost_openai_image_gen():
assert cost == 0.019922944
@pytest.mark.skip(reason="AWS Suspended Account")
def test_cost_bedrock_pricing():
"""
- get pricing specific to region for a model
@ -226,6 +227,7 @@ def test_cost_bedrock_pricing():
assert cost == predicted_cost
@pytest.mark.skip(reason="AWS disabled our access")
def test_cost_bedrock_pricing_actual_calls():
litellm.set_verbose = True
model = "anthropic.claude-instant-v1"

View file

@ -80,16 +80,6 @@ model_list:
description: this is a test openai model
id: 9b1ef341-322c-410a-8992-903987fef439
model_name: test_openai_models
- litellm_params:
model: bedrock/amazon.titan-embed-text-v1
model_info:
mode: embedding
model_name: amazon-embeddings
- litellm_params:
model: sagemaker/berri-benchmarking-gpt-j-6b-fp16
model_info:
mode: embedding
model_name: GPT-J 6B - Sagemaker Text Embedding (Internal)
- litellm_params:
model: dall-e-3
model_info:

View file

@ -478,17 +478,18 @@ async def test_async_chat_azure_stream():
## Test Bedrock + sync
@pytest.mark.skip(reason="AWS Suspended Account")
def test_chat_bedrock_stream():
try:
customHandler = CompletionCustomHandler()
litellm.callbacks = [customHandler]
response = litellm.completion(
model="bedrock/anthropic.claude-v1",
model="bedrock/anthropic.claude-v2",
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
)
# test streaming
response = litellm.completion(
model="bedrock/anthropic.claude-v1",
model="bedrock/anthropic.claude-v2",
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
stream=True,
)
@ -497,7 +498,7 @@ def test_chat_bedrock_stream():
# test failure callback
try:
response = litellm.completion(
model="bedrock/anthropic.claude-v1",
model="bedrock/anthropic.claude-v2",
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
aws_region_name="my-bad-region",
stream=True,
@ -518,18 +519,19 @@ def test_chat_bedrock_stream():
## Test Bedrock + Async
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_async_chat_bedrock_stream():
try:
customHandler = CompletionCustomHandler()
litellm.callbacks = [customHandler]
response = await litellm.acompletion(
model="bedrock/anthropic.claude-v1",
model="bedrock/anthropic.claude-v2",
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
)
# test streaming
response = await litellm.acompletion(
model="bedrock/anthropic.claude-v1",
model="bedrock/anthropic.claude-v2",
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
stream=True,
)
@ -540,7 +542,7 @@ async def test_async_chat_bedrock_stream():
## test failure callback
try:
response = await litellm.acompletion(
model="bedrock/anthropic.claude-v1",
model="bedrock/anthropic.claude-v2",
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
aws_region_name="my-bad-key",
stream=True,
@ -561,6 +563,7 @@ async def test_async_chat_bedrock_stream():
## Test Sagemaker + Async
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_async_chat_sagemaker_stream():
try:
@ -793,6 +796,7 @@ async def test_async_embedding_azure():
## Test Bedrock + Async
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_async_embedding_bedrock():
try:

View file

@ -388,6 +388,7 @@ async def test_async_custom_handler_embedding_optional_param():
# asyncio.run(test_async_custom_handler_embedding_optional_param())
@pytest.mark.skip(reason="AWS Account suspended. Pending their approval")
@pytest.mark.asyncio
async def test_async_custom_handler_embedding_optional_param_bedrock():
"""

View file

@ -67,6 +67,7 @@ def verify_log_file(log_file_path):
assert success_count == 3 # Expect 3 success logs from dynamoDB
@pytest.mark.skip(reason="AWS Suspended Account")
def test_dynamo_logging():
# all dynamodb requests need to be in one test function
# since we are modifying stdout, and pytests runs tests in parallel

View file

@ -256,6 +256,7 @@ async def test_vertexai_aembedding():
pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_bedrock_embedding_titan():
try:
# this tests if we support str input for bedrock embedding
@ -301,6 +302,7 @@ def test_bedrock_embedding_titan():
# test_bedrock_embedding_titan()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_bedrock_embedding_cohere():
try:
litellm.set_verbose = False
@ -422,6 +424,7 @@ def test_aembedding_azure():
# test_aembedding_azure()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_sagemaker_embeddings():
try:
response = litellm.embedding(
@ -438,6 +441,7 @@ def test_sagemaker_embeddings():
pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_sagemaker_aembeddings():
try:

View file

@ -42,6 +42,7 @@ exception_models = [
# Test 1: Context Window Errors
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.parametrize("model", exception_models)
def test_context_window(model):
print("Testing context window error")
@ -120,9 +121,9 @@ def invalid_auth(model): # set the model key to an invalid key, depending on th
os.environ["AI21_API_KEY"] = "bad-key"
elif "togethercomputer" in model:
temporary_key = os.environ["TOGETHERAI_API_KEY"]
os.environ[
"TOGETHERAI_API_KEY"
] = "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
os.environ["TOGETHERAI_API_KEY"] = (
"84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
)
elif model in litellm.openrouter_models:
temporary_key = os.environ["OPENROUTER_API_KEY"]
os.environ["OPENROUTER_API_KEY"] = "bad-key"

View file

@ -87,6 +87,7 @@ async def test_azure_img_gen_health_check():
# asyncio.run(test_azure_img_gen_health_check())
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_sagemaker_embedding_health_check():
response = await litellm.ahealth_check(

View file

@ -121,6 +121,7 @@ async def test_async_image_generation_azure():
pytest.fail(f"An exception occurred - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_image_generation_bedrock():
try:
litellm.set_verbose = True
@ -141,6 +142,7 @@ def test_image_generation_bedrock():
pytest.fail(f"An exception occurred - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_aimage_generation_bedrock_with_optional_params():
try:

File diff suppressed because it is too large Load diff

View file

@ -80,6 +80,14 @@ request_data = {
@pytest.fixture
def prisma_client():
from litellm.proxy.proxy_cli import append_query_params
### add connection pool + pool timeout args
params = {"connection_limit": 100, "pool_timeout": 60}
database_url = os.getenv("DATABASE_URL")
modified_url = append_query_params(database_url, params)
os.environ["DATABASE_URL"] = modified_url
# Assuming DBClient is a class that needs to be instantiated
prisma_client = PrismaClient(
database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
@ -1633,3 +1641,99 @@ async def test_key_with_no_permissions(prisma_client):
except Exception as e:
print("Got Exception", e)
print(e.message)
async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
from litellm import ModelResponse, Choices, Message, Usage
from litellm.proxy.proxy_server import (
_PROXY_track_cost_callback as track_cost_callback,
)
import uuid
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
resp = ModelResponse(
id=request_id,
choices=[
Choices(
finish_reason=None,
index=0,
message=Message(
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
role="assistant",
),
)
],
model="gpt-35-turbo", # azure always has model written like this
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
)
await track_cost_callback(
kwargs={
"call_type": "acompletion",
"model": "sagemaker-chatgpt-v-2",
"stream": True,
"complete_streaming_response": resp,
"litellm_params": {
"metadata": {
"user_api_key": hash_token(generated_key),
"user_api_key_user_id": user_id,
}
},
"response_cost": 0.00005,
},
completion_response=resp,
start_time=datetime.now(),
end_time=datetime.now(),
)
@pytest.mark.skip(reason="High traffic load test for spend tracking")
@pytest.mark.asyncio
async def test_proxy_load_test_db(prisma_client):
"""
Run 1500 req./s against track_cost_callback function
"""
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
from litellm._logging import verbose_proxy_logger
import logging, time
litellm.set_verbose = True
verbose_proxy_logger.setLevel(logging.DEBUG)
try:
start_time = time.time()
await litellm.proxy.proxy_server.prisma_client.connect()
request = GenerateKeyRequest(max_budget=0.00001)
key = await generate_key_fn(request)
print(key)
generated_key = key.key
user_id = key.user_id
bearer_token = "Bearer " + generated_key
request = Request(scope={"type": "http"})
request._url = URL(url="/chat/completions")
# use generated key to auth in
result = await user_api_key_auth(request=request, api_key=bearer_token)
print("result from user auth with new key", result)
# update spend using track_cost callback, make 2nd request, it should fail
n = 5000
tasks = [
track_cost_callback_helper_fn(generated_key=generated_key, user_id=user_id)
for _ in range(n)
]
completions = await asyncio.gather(*tasks)
await asyncio.sleep(120)
try:
# call spend logs
spend_logs = await view_spend_logs(api_key=generated_key)
print(f"len responses: {len(spend_logs)}")
assert len(spend_logs) == n
print(n, time.time() - start_time, len(spend_logs))
except:
print(n, time.time() - start_time, 0)
raise Exception(f"it worked! key={key.key}")
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")

View file

@ -12,6 +12,7 @@ import litellm
from litellm import completion
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_sagemaker():
litellm.set_verbose = True
litellm.drop_params = True

View file

@ -473,6 +473,7 @@ def aleph_alpha_test_completion():
# Sagemaker
@pytest.mark.skip(reason="AWS Suspended Account")
def sagemaker_test_completion():
litellm.SagemakerConfig(max_new_tokens=10)
# litellm.set_verbose=True
@ -514,6 +515,7 @@ def sagemaker_test_completion():
# Bedrock
@pytest.mark.skip(reason="AWS Suspended Account")
def bedrock_test_completion():
litellm.AmazonCohereConfig(max_tokens=10)
# litellm.set_verbose=True

View file

@ -125,6 +125,7 @@ def test_embedding(client_no_auth):
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_bedrock_embedding(client_no_auth):
global headers
from litellm.proxy.proxy_server import user_custom_auth
@ -145,6 +146,7 @@ def test_bedrock_embedding(client_no_auth):
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_sagemaker_embedding(client_no_auth):
global headers
from litellm.proxy.proxy_server import user_custom_auth

View file

@ -61,6 +61,7 @@ def generate_random_word(length=4):
return "".join(random.choice(letters) for _ in range(length))
@pytest.mark.skip(reason="AWS Suspended Account")
def test_chat_completion(client_no_auth):
global headers
try:

View file

@ -166,14 +166,6 @@ def test_call_one_endpoint():
"tpm": 240000,
"rpm": 1800,
},
{
"model_name": "claude-v1",
"litellm_params": {
"model": "bedrock/anthropic.claude-instant-v1",
},
"tpm": 100000,
"rpm": 10000,
},
{
"model_name": "text-embedding-ada-002",
"litellm_params": {
@ -202,15 +194,6 @@ def test_call_one_endpoint():
)
print("\n response", response)
async def call_bedrock_claude():
response = await router.acompletion(
model="bedrock/anthropic.claude-instant-v1",
messages=[{"role": "user", "content": "hello this request will pass"}],
specific_deployment=True,
)
print("\n response", response)
async def call_azure_embedding():
response = await router.aembedding(
model="azure/azure-embedding-model",
@ -221,7 +204,6 @@ def test_call_one_endpoint():
print("\n response", response)
asyncio.run(call_azure_completion())
asyncio.run(call_bedrock_claude())
asyncio.run(call_azure_embedding())
os.environ["AZURE_API_BASE"] = old_api_base
@ -593,6 +575,7 @@ def test_azure_embedding_on_router():
# test_azure_embedding_on_router()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_bedrock_on_router():
litellm.set_verbose = True
print("\n Testing bedrock on router\n")

View file

@ -87,6 +87,7 @@ def test_router_timeouts():
print("********** TOKENS USED SO FAR = ", total_tokens_used)
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_router_timeouts_bedrock():
import openai

View file

@ -764,6 +764,7 @@ def test_completion_replicate_stream_bad_key():
# test_completion_replicate_stream_bad_key()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_claude_stream():
try:
litellm.set_verbose = False
@ -810,6 +811,7 @@ def test_completion_bedrock_claude_stream():
# test_completion_bedrock_claude_stream()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_ai21_stream():
try:
litellm.set_verbose = False
@ -911,6 +913,7 @@ def test_sagemaker_weird_response():
# test_sagemaker_weird_response()
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_sagemaker_streaming_async():
try:
@ -949,6 +952,7 @@ async def test_sagemaker_streaming_async():
# asyncio.run(test_sagemaker_streaming_async())
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_sagemaker_stream():
try:
response = completion(
@ -1075,8 +1079,6 @@ async def test_hf_completion_tgi_stream():
if finished:
break
idx += 1
if complete_response.strip() == "":
raise Exception("Empty response received")
print(f"completion_response: {complete_response}")
except litellm.ServiceUnavailableError as e:
pass

View file

@ -317,3 +317,24 @@ def test_token_counter():
# test_token_counter()
def test_supports_function_calling():
try:
assert litellm.supports_function_calling(model="gpt-3.5-turbo") == True
assert (
litellm.supports_function_calling(model="azure/gpt-4-1106-preview") == True
)
assert (
litellm.supports_function_calling(model="anthropic.claude-instant-v1")
== False
)
assert litellm.supports_function_calling(model="palm/chat-bison") == False
assert litellm.supports_function_calling(model="ollama/llama2") == False
assert (
litellm.supports_function_calling(model="anthropic.claude-instant-v1")
== False
)
assert litellm.supports_function_calling(model="claude-2") == False
except Exception as e:
pytest.fail(f"Error occurred: {e}")

View file

@ -205,18 +205,18 @@ def map_finish_reason(
class FunctionCall(OpenAIObject):
arguments: str
name: str
name: Optional[str] = None
class Function(OpenAIObject):
arguments: str
name: str
name: Optional[str] = None
class ChatCompletionDeltaToolCall(OpenAIObject):
id: str
id: Optional[str] = None
function: Function
type: str
type: Optional[str] = None
index: int
@ -275,13 +275,19 @@ class Delta(OpenAIObject):
super(Delta, self).__init__(**params)
self.content = content
self.role = role
self.function_call = function_call
if tool_calls is not None and isinstance(tool_calls, dict):
if function_call is not None and isinstance(function_call, dict):
self.function_call = FunctionCall(**function_call)
else:
self.function_call = function_call
if tool_calls is not None and isinstance(tool_calls, list):
self.tool_calls = []
for tool_call in tool_calls:
if tool_call.get("index", None) is None:
tool_call["index"] = 0
self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
if isinstance(tool_call, dict):
if tool_call.get("index", None) is None:
tool_call["index"] = 0
self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
elif isinstance(tool_call, ChatCompletionDeltaToolCall):
self.tool_calls.append(tool_call)
else:
self.tool_calls = tool_calls
@ -1636,7 +1642,7 @@ class Logging:
verbose_logger.debug(
"Async success callbacks: Got a complete streaming response"
)
self.model_call_details["complete_streaming_response"] = (
self.model_call_details["async_complete_streaming_response"] = (
complete_streaming_response
)
try:
@ -1684,28 +1690,31 @@ class Logging:
print_verbose("async success_callback: reaches cache for logging!")
kwargs = self.model_call_details
if self.stream:
if "complete_streaming_response" not in kwargs:
if "async_complete_streaming_response" not in kwargs:
print_verbose(
f"async success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n"
)
pass
else:
print_verbose(
"async success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
"async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache"
)
result = kwargs["complete_streaming_response"]
result = kwargs["async_complete_streaming_response"]
# only add to cache once we have a complete streaming response
litellm.cache.add_cache(result, **kwargs)
if isinstance(callback, CustomLogger): # custom logger class
print_verbose(
f"Async success callbacks: {callback}; self.stream: {self.stream}; complete_streaming_response: {self.model_call_details.get('complete_streaming_response', None)}"
f"Running Async success callback: {callback}; self.stream: {self.stream}; async_complete_streaming_response: {self.model_call_details.get('async_complete_streaming_response', None)} result={result}"
)
if self.stream == True:
if "complete_streaming_response" in self.model_call_details:
if (
"async_complete_streaming_response"
in self.model_call_details
):
await callback.async_log_success_event(
kwargs=self.model_call_details,
response_obj=self.model_call_details[
"complete_streaming_response"
"async_complete_streaming_response"
],
start_time=start_time,
end_time=end_time,
@ -1726,14 +1735,18 @@ class Logging:
)
if callable(callback): # custom logger functions
print_verbose(
f"Making async function logging call - {self.model_call_details}"
f"Making async function logging call for {callback}, result={result} - {self.model_call_details}"
)
if self.stream:
if "complete_streaming_response" in self.model_call_details:
if (
"async_complete_streaming_response"
in self.model_call_details
):
await customLogger.async_log_event(
kwargs=self.model_call_details,
response_obj=self.model_call_details[
"complete_streaming_response"
"async_complete_streaming_response"
],
start_time=start_time,
end_time=end_time,
@ -1754,14 +1767,17 @@ class Logging:
if dynamoLogger is None:
dynamoLogger = DyanmoDBLogger()
if self.stream:
if "complete_streaming_response" in self.model_call_details:
if (
"async_complete_streaming_response"
in self.model_call_details
):
print_verbose(
"DynamoDB Logger: Got Stream Event - Completed Stream Response"
)
await dynamoLogger._async_log_event(
kwargs=self.model_call_details,
response_obj=self.model_call_details[
"complete_streaming_response"
"async_complete_streaming_response"
],
start_time=start_time,
end_time=end_time,
@ -3715,6 +3731,54 @@ def completion_cost(
raise e
def supports_function_calling(model: str):
"""
Check if the given model supports function calling and return a boolean value.
Parameters:
model (str): The model name to be checked.
Returns:
bool: True if the model supports function calling, False otherwise.
Raises:
Exception: If the given model is not found in model_prices_and_context_window.json.
"""
if model in litellm.model_cost:
model_info = litellm.model_cost[model]
if model_info.get("supports_function_calling", False):
return True
return False
else:
raise Exception(
f"Model not in model_prices_and_context_window.json. You passed model={model}."
)
def supports_parallel_function_calling(model: str):
"""
Check if the given model supports parallel function calling and return True if it does, False otherwise.
Parameters:
model (str): The model to check for support of parallel function calling.
Returns:
bool: True if the model supports parallel function calling, False otherwise.
Raises:
Exception: If the model is not found in the model_cost dictionary.
"""
if model in litellm.model_cost:
model_info = litellm.model_cost[model]
if model_info.get("supports_parallel_function_calling", False):
return True
return False
else:
raise Exception(
f"Model not in model_prices_and_context_window.json. You passed model={model}."
)
####### HELPER FUNCTIONS ################
def register_model(model_cost: Union[str, dict]):
"""
@ -4043,6 +4107,7 @@ def get_optional_params(
and custom_llm_provider != "vertex_ai"
and custom_llm_provider != "anyscale"
and custom_llm_provider != "together_ai"
and custom_llm_provider != "mistral"
):
if custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
# ollama actually supports json output
@ -4713,7 +4778,14 @@ def get_optional_params(
if max_tokens:
optional_params["max_tokens"] = max_tokens
elif custom_llm_provider == "mistral":
supported_params = ["temperature", "top_p", "stream", "max_tokens"]
supported_params = [
"temperature",
"top_p",
"stream",
"max_tokens",
"tools",
"tool_choice",
]
_check_valid_arg(supported_params=supported_params)
if temperature is not None:
optional_params["temperature"] = temperature
@ -4723,6 +4795,10 @@ def get_optional_params(
optional_params["stream"] = stream
if max_tokens is not None:
optional_params["max_tokens"] = max_tokens
if tools is not None:
optional_params["tools"] = tools
if tool_choice is not None:
optional_params["tool_choice"] = tool_choice
# check safe_mode, random_seed: https://docs.mistral.ai/api/#operation/createChatCompletion
safe_mode = passed_params.pop("safe_mode", None)
@ -6947,7 +7023,7 @@ def exception_type(
if "500 An internal error has occurred." in error_str:
exception_mapping_worked = True
raise APIError(
status_code=original_exception.status_code,
status_code=getattr(original_exception, "status_code", 500),
message=f"PalmException - {original_exception.message}",
llm_provider="palm",
model=model,
@ -8730,7 +8806,7 @@ class CustomStreamWrapper:
or original_chunk.choices[0].delta.tool_calls is not None
):
try:
delta = dict(original_chunk.choices[0].delta)
delta = original_chunk.choices[0].delta
model_response.system_fingerprint = (
original_chunk.system_fingerprint
)
@ -8765,7 +8841,9 @@ class CustomStreamWrapper:
is None
):
t.function.arguments = ""
model_response.choices[0].delta = Delta(**delta)
_json_delta = delta.model_dump()
print_verbose(f"_json_delta: {_json_delta}")
model_response.choices[0].delta = Delta(**_json_delta)
except Exception as e:
traceback.print_exc()
model_response.choices[0].delta = Delta()

View file

@ -6,7 +6,8 @@
"input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"gpt-4-turbo-preview": {
"max_tokens": 8192,
@ -15,7 +16,9 @@
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"gpt-4-0314": {
"max_tokens": 8192,
@ -33,7 +36,8 @@
"input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"gpt-4-32k": {
"max_tokens": 32768,
@ -69,7 +73,9 @@
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"gpt-4-0125-preview": {
"max_tokens": 128000,
@ -78,7 +84,9 @@
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"gpt-4-vision-preview": {
"max_tokens": 128000,
@ -105,7 +113,8 @@
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"gpt-3.5-turbo-0301": {
"max_tokens": 4097,
@ -123,7 +132,8 @@
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"gpt-3.5-turbo-1106": {
"max_tokens": 16385,
@ -132,7 +142,9 @@
"input_cost_per_token": 0.0000010,
"output_cost_per_token": 0.0000020,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"gpt-3.5-turbo-0125": {
"max_tokens": 16385,
@ -141,7 +153,9 @@
"input_cost_per_token": 0.0000005,
"output_cost_per_token": 0.0000015,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16385,
@ -286,7 +300,9 @@
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"azure/gpt-4-1106-preview": {
"max_tokens": 128000,
@ -295,7 +311,9 @@
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"azure/gpt-4-0613": {
"max_tokens": 8192,
@ -304,7 +322,8 @@
"input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"azure/gpt-4-32k-0613": {
"max_tokens": 32768,
@ -331,7 +350,8 @@
"input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"azure/gpt-4-turbo": {
"max_tokens": 128000,
@ -340,7 +360,9 @@
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"azure/gpt-4-turbo-vision-preview": {
"max_tokens": 128000,
@ -358,7 +380,8 @@
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"azure/gpt-35-turbo-1106": {
"max_tokens": 16384,
@ -367,7 +390,20 @@
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"azure/gpt-35-turbo-0125": {
"max_tokens": 16384,
"max_input_tokens": 16384,
"max_output_tokens": 4096,
"input_cost_per_token": 0.0000005,
"output_cost_per_token": 0.0000015,
"litellm_provider": "azure",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"azure/gpt-35-turbo-16k": {
"max_tokens": 16385,
@ -385,7 +421,8 @@
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
"litellm_provider": "azure",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"azure/ada": {
"max_tokens": 8191,
@ -514,11 +551,12 @@
"mode": "chat"
},
"mistral/mistral-large-latest": {
"max_tokens": 8192,
"max_tokens": 32000,
"input_cost_per_token": 0.000008,
"output_cost_per_token": 0.000024,
"litellm_provider": "mistral",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"mistral/mistral-embed": {
"max_tokens": 8192,
@ -676,7 +714,8 @@
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"gemini-1.5-pro": {
"max_tokens": 8192,
@ -1738,6 +1777,23 @@
"output_cost_per_token": 0.0000009,
"litellm_provider": "together_ai"
},
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
"input_cost_per_token": 0.0000006,
"output_cost_per_token": 0.0000006,
"litellm_provider": "together_ai",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"together_ai/mistralai/Mistral-7B-Instruct-v0.1": {
"litellm_provider": "together_ai",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"together_ai/togethercomputer/CodeLlama-34b-Instruct": {
"litellm_provider": "together_ai",
"supports_function_calling": true,
"supports_parallel_function_calling": true
},
"ollama/llama2": {
"max_tokens": 4096,
"input_cost_per_token": 0.0,
@ -1990,7 +2046,16 @@
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "anyscale",
"mode": "chat"
"mode": "chat",
"supports_function_calling": true
},
"anyscale/Mixtral-8x7B-Instruct-v0.1": {
"max_tokens": 16384,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "anyscale",
"mode": "chat",
"supports_function_calling": true
},
"anyscale/HuggingFaceH4/zephyr-7b-beta": {
"max_tokens": 16384,

View file

@ -40,6 +40,8 @@ litellm_settings:
budget_duration: 30d
general_settings:
master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
proxy_budget_rescheduler_min_time: 30
proxy_budget_rescheduler_max_time: 60
# database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy
environment_variables:

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.27.12"
version = "1.28.0"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.27.12"
version = "1.28.0"
version_files = [
"pyproject.toml:^version"
]

View file

@ -10,6 +10,7 @@ gunicorn==21.2.0 # server dep
boto3==1.34.34 # aws bedrock/sagemaker calls
redis==5.0.0 # caching
numpy==1.24.3 # semantic caching
pandas==2.1.1 # for viewing clickhouse spend analytics
prisma==0.11.0 # for db
mangum==0.17.0 # for aws lambda functions
google-generativeai==0.3.2 # for vertex ai calls

View file

@ -449,7 +449,7 @@ async def test_key_with_budgets():
reset_at_init_value = key_info["info"]["budget_reset_at"]
reset_at_new_value = None
i = 0
await asyncio.sleep(610)
await asyncio.sleep(120)
while i < 3:
key_info = await get_key_info(session=session, get_key=key, call_key=key)
reset_at_new_value = key_info["info"]["budget_reset_at"]
@ -490,6 +490,7 @@ async def test_key_crossing_budget():
assert "ExceededTokenBudget: Current spend for token:" in str(e)
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_key_info_spend_values_sagemaker():
"""

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a697f24d60c9c262.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a697f24d60c9c262.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/6920a121699cde9c.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[24143,[\"303\",\"static/chunks/303-d80f23087a9e6aec.js\",\"931\",\"static/chunks/app/page-d4fe4a48cbd3572c.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/6920a121699cde9c.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"eSwVwl_InIrhYtCAqDMKF\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[30280,[\"303\",\"static/chunks/303-d80f23087a9e6aec.js\",\"931\",\"static/chunks/app/page-8f65fc157f538dff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kyOCJPBB9pyUfbMKCAXr-\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[24143,["303","static/chunks/303-d80f23087a9e6aec.js","931","static/chunks/app/page-d4fe4a48cbd3572c.js"],""]
3:I[30280,["303","static/chunks/303-d80f23087a9e6aec.js","931","static/chunks/app/page-8f65fc157f538dff.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["eSwVwl_InIrhYtCAqDMKF",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/6920a121699cde9c.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["kyOCJPBB9pyUfbMKCAXr-",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a40ad0909dd7838e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -6,6 +6,7 @@ import UserDashboard from "../components/user_dashboard";
import ModelDashboard from "@/components/model_dashboard";
import ViewUserDashboard from "@/components/view_users";
import Teams from "@/components/teams";
import AdminPanel from "@/components/admins";
import ChatUI from "@/components/chat_ui";
import Sidebar from "../components/leftnav";
import Usage from "../components/usage";
@ -73,6 +74,10 @@ const CreateKeyPage = () => {
return "App Owner";
case "app_admin":
return "Admin";
case "proxy_admin":
return "Admin";
case "proxy_admin_viewer":
return "Admin Viewer";
case "app_user":
return "App User";
default:
@ -133,6 +138,12 @@ const CreateKeyPage = () => {
searchParams={searchParams}
accessToken={accessToken}
/>
) : page == "admin-panel" ? (
<AdminPanel
setTeams={setTeams}
searchParams={searchParams}
accessToken={accessToken}
/>
) : (
<Usage
userID={userID}

View file

@ -0,0 +1,228 @@
/**
* Allow proxy admin to add other people to view global spend
* Use this to avoid sharing master key with others
*/
import React, { useState, useEffect } from "react";
import { Typography } from "antd";
import {
Button as Button2,
Modal,
Form,
Input,
Select as Select2,
InputNumber,
message,
} from "antd";
import { Select, SelectItem } from "@tremor/react";
import {
Table,
TableBody,
TableCell,
TableHead,
TableHeaderCell,
TableRow,
Card,
Icon,
Button,
Col,
Text,
Grid,
} from "@tremor/react";
import { CogIcon } from "@heroicons/react/outline";
interface AdminPanelProps {
searchParams: any;
accessToken: string | null;
setTeams: React.Dispatch<React.SetStateAction<Object[] | null>>;
}
import {
userUpdateUserCall,
Member,
userGetAllUsersCall,
User,
} from "./networking";
const AdminPanel: React.FC<AdminPanelProps> = ({
searchParams,
accessToken,
}) => {
const [form] = Form.useForm();
const [memberForm] = Form.useForm();
const { Title, Paragraph } = Typography;
const [value, setValue] = useState("");
const [admins, setAdmins] = useState<null | any[]>(null);
const [isAddMemberModalVisible, setIsAddMemberModalVisible] = useState(false);
useEffect(() => {
// Fetch model info and set the default selected model
const fetchProxyAdminInfo = async () => {
if (accessToken != null) {
const combinedList: any[] = [];
const proxyViewers = await userGetAllUsersCall(
accessToken,
"proxy_admin_viewer"
);
proxyViewers.forEach((viewer: User) => {
combinedList.push({
user_role: viewer.user_role,
user_id: viewer.user_id,
user_email: viewer.user_email,
});
});
console.log(`proxy viewers: ${proxyViewers}`);
const proxyAdmins = await userGetAllUsersCall(
accessToken,
"proxy_admin"
);
proxyAdmins.forEach((admins: User) => {
combinedList.push({
user_role: admins.user_role,
user_id: admins.user_id,
user_email: admins.user_email,
});
});
console.log(`proxy admins: ${proxyAdmins}`);
console.log(`combinedList: ${combinedList}`);
setAdmins(combinedList);
}
};
fetchProxyAdminInfo();
}, [accessToken]);
const handleMemberOk = () => {
setIsAddMemberModalVisible(false);
memberForm.resetFields();
};
const handleMemberCancel = () => {
setIsAddMemberModalVisible(false);
memberForm.resetFields();
};
const handleMemberCreate = async (formValues: Record<string, any>) => {
try {
if (accessToken != null && admins != null) {
message.info("Making API Call");
const user_role: Member = {
role: "user",
user_email: formValues.user_email,
user_id: formValues.user_id,
};
const response: any = await userUpdateUserCall(accessToken, formValues);
console.log(`response for team create call: ${response}`);
// Checking if the team exists in the list and updating or adding accordingly
const foundIndex = admins.findIndex((user) => {
console.log(
`user.user_id=${user.user_id}; response.user_id=${response.user_id}`
);
return user.user_id === response.user_id;
});
console.log(`foundIndex: ${foundIndex}`);
if (foundIndex == -1) {
console.log(`updates admin with new user`);
admins.push(response);
// If new user is found, update it
setAdmins(admins); // Set the new state
}
setIsAddMemberModalVisible(false);
}
} catch (error) {
console.error("Error creating the key:", error);
}
};
console.log(`admins: ${admins?.length}`);
return (
<div className="w-full m-2">
<Title level={4}>Proxy Admins</Title>
<Paragraph>
Add other people to just view global spend. They cannot create teams or
grant users access to new models.
</Paragraph>
<Grid numItems={1} className="gap-2 p-0 w-full">
<Col numColSpan={1}>
<Card className="w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]">
<Table>
<TableHead>
<TableRow>
<TableHeaderCell>Member Name</TableHeaderCell>
<TableHeaderCell>Role</TableHeaderCell>
<TableHeaderCell>Action</TableHeaderCell>
</TableRow>
</TableHead>
<TableBody>
{admins
? admins.map((member: any, index: number) => (
<TableRow key={index}>
<TableCell>
{member["user_email"]
? member["user_email"]
: member["user_id"]
? member["user_id"]
: null}
</TableCell>
<TableCell>{member["user_role"]}</TableCell>
<TableCell>
<Icon icon={CogIcon} size="sm" />
</TableCell>
</TableRow>
))
: null}
</TableBody>
</Table>
</Card>
</Col>
<Col numColSpan={1}>
<Button
className="mx-auto mb-5"
onClick={() => setIsAddMemberModalVisible(true)}
>
+ Add viewer
</Button>
<Modal
title="Add viewer"
visible={isAddMemberModalVisible}
width={800}
footer={null}
onOk={handleMemberOk}
onCancel={handleMemberCancel}
>
<Form
form={form}
onFinish={handleMemberCreate}
labelCol={{ span: 8 }}
wrapperCol={{ span: 16 }}
labelAlign="left"
>
<>
<Form.Item label="Email" name="user_email" className="mb-4">
<Input
name="user_email"
className="px-3 py-2 border rounded-md w-full"
/>
</Form.Item>
<div className="text-center mb-4">OR</div>
<Form.Item label="User ID" name="user_id" className="mb-4">
<Input
name="user_id"
className="px-3 py-2 border rounded-md w-full"
/>
</Form.Item>
</>
<div style={{ textAlign: "right", marginTop: "10px" }}>
<Button2 htmlType="submit">Add member</Button2>
</div>
</Form>
</Modal>
</Col>
</Grid>
</div>
);
};
export default AdminPanel;

View file

@ -46,6 +46,11 @@ const Sidebar: React.FC<SidebarProps> = ({
Teams
</Menu.Item>
) : null}
{userRole == "Admin" ? (
<Menu.Item key="7" onClick={() => setPage("admin-panel")}>
Admin
</Menu.Item>
) : null}
</Menu>
</Sider>
</Layout>

View file

@ -313,6 +313,7 @@ export const userSpendLogsCall = async (
endTime: String
) => {
try {
console.log(`user role in spend logs call: ${userRole}`);
let url = proxyBaseUrl ? `${proxyBaseUrl}/spend/logs` : `/spend/logs`;
if (userRole == "App Owner") {
url = `${url}/?user_id=${userID}&start_date=${startTime}&end_date=${endTime}`;
@ -343,6 +344,96 @@ export const userSpendLogsCall = async (
}
};
export const adminSpendLogsCall = async (accessToken: String) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/global/spend/logs`
: `/global/spend/logs`;
message.info("Making spend logs request");
const response = await fetch(url, {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
});
if (!response.ok) {
const errorData = await response.text();
message.error(errorData);
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log(data);
message.success("Spend Logs received");
return data;
} catch (error) {
console.error("Failed to create key:", error);
throw error;
}
};
export const adminTopKeysCall = async (accessToken: String) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/global/spend/keys?limit=5`
: `/global/spend/keys?limit=5`;
message.info("Making spend keys request");
const response = await fetch(url, {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
});
if (!response.ok) {
const errorData = await response.text();
message.error(errorData);
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log(data);
message.success("Spend Logs received");
return data;
} catch (error) {
console.error("Failed to create key:", error);
throw error;
}
};
export const adminTopModelsCall = async (accessToken: String) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/global/spend/models?limit=5`
: `/global/spend/models?limit=5`;
message.info("Making spend models request");
const response = await fetch(url, {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
});
if (!response.ok) {
const errorData = await response.text();
message.error(errorData);
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log(data);
message.success("Spend Logs received");
return data;
} catch (error) {
console.error("Failed to create key:", error);
throw error;
}
};
export const keyInfoCall = async (accessToken: String, keys: String[]) => {
try {
let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/v2/key/info`;
@ -468,6 +559,46 @@ export const userGetRequesedtModelsCall = async (accessToken: String) => {
}
};
export interface User {
user_role: string;
user_id: string;
user_email: string;
[key: string]: string; // Include any other potential keys in the dictionary
}
export const userGetAllUsersCall = async (
accessToken: String,
role: String
) => {
try {
const url = proxyBaseUrl
? `${proxyBaseUrl}/user/get_users?role=${role}`
: `/user/get_users?role=${role}`;
console.log("in userGetAllUsersCall:", url);
const response = await fetch(url, {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
});
if (!response.ok) {
const errorData = await response.text();
message.error("Failed to delete key: " + errorData);
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log(data);
message.success("Got all users");
return data;
// Handle success - you might want to update some state or UI based on the created key
} catch (error) {
console.error("Failed to get requested models:", error);
throw error;
}
};
export const teamCreateCall = async (
accessToken: string,
formValues: Record<string, any> // Assuming formValues is an object
@ -549,3 +680,41 @@ export const teamMemberAddCall = async (
throw error;
}
};
export const userUpdateUserCall = async (
accessToken: string,
formValues: any // Assuming formValues is an object
) => {
try {
console.log("Form Values in userUpdateUserCall:", formValues); // Log the form values before making the API call
const url = proxyBaseUrl ? `${proxyBaseUrl}/user/update` : `/user/update`;
const response = await fetch(url, {
method: "POST",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
user_role: "proxy_admin_viewer",
...formValues, // Include formValues in the request body
}),
});
if (!response.ok) {
const errorData = await response.text();
message.error("Failed to create key: " + errorData);
console.error("Error response from the server:", errorData);
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log("API Response:", data);
message.success("User role updated");
return data;
// Handle success - you might want to update some state or UI based on the created key
} catch (error) {
console.error("Failed to create key:", error);
throw error;
}
};

View file

@ -2,7 +2,13 @@ import { BarChart, Card, Title } from "@tremor/react";
import React, { useState, useEffect } from "react";
import { Grid, Col, Text, LineChart } from "@tremor/react";
import { userSpendLogsCall, keyInfoCall } from "./networking";
import {
userSpendLogsCall,
keyInfoCall,
adminSpendLogsCall,
adminTopKeysCall,
adminTopModelsCall,
} from "./networking";
import { start } from "repl";
interface UsagePageProps {
@ -164,29 +170,61 @@ const UsagePage: React.FC<UsagePageProps> = ({
if (accessToken && token && userRole && userID) {
const fetchData = async () => {
try {
await userSpendLogsCall(
accessToken,
token,
userRole,
userID,
startTime,
endTime
).then(async (response) => {
const topKeysResponse = await keyInfoCall(
accessToken,
getTopKeys(response)
);
const filtered_keys = topKeysResponse["info"].map((k: any) => ({
key: (k["key_name"] || k["key_alias"] || k["token"]).substring(
/**
* If user is Admin - query the global views endpoints
* If user is App Owner - use the normal spend logs call
*/
console.log(`user role: ${userRole}`);
if (userRole == "Admin") {
const overall_spend = await adminSpendLogsCall(accessToken);
setKeySpendData(overall_spend);
const top_keys = await adminTopKeysCall(accessToken);
const filtered_keys = top_keys.map((k: any) => ({
key: (k["key_name"] || k["key_alias"] || k["api_key"]).substring(
0,
7
),
spend: k["spend"],
spend: k["total_spend"],
}));
setTopKeys(filtered_keys);
setTopUsers(getTopUsers(response));
setKeySpendData(response);
});
const top_models = await adminTopModelsCall(accessToken);
} else if (userRole == "App Owner") {
await userSpendLogsCall(
accessToken,
token,
userRole,
userID,
startTime,
endTime
).then(async (response) => {
console.log("result from spend logs call", response);
if ("daily_spend" in response) {
// this is from clickhouse analytics
//
let daily_spend = response["daily_spend"];
console.log("daily spend", daily_spend);
setKeySpendData(daily_spend);
let topApiKeys = response.top_api_keys;
setTopKeys(topApiKeys);
} else {
const topKeysResponse = await keyInfoCall(
accessToken,
getTopKeys(response)
);
const filtered_keys = topKeysResponse["info"].map((k: any) => ({
key: (
k["key_name"] ||
k["key_alias"] ||
k["token"]
).substring(0, 7),
spend: k["spend"],
}));
setTopKeys(filtered_keys);
setTopUsers(getTopUsers(response));
setKeySpendData(response);
}
});
}
} catch (error) {
console.error("There was an error fetching the data", error);
// Optionally, update your UI to reflect the error state here as well
@ -210,7 +248,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
valueFormatter={valueFormatter}
yAxisWidth={100}
tickGap={5}
customTooltip={customTooltip}
// customTooltip={customTooltip}
/>
</Card>
</Col>