forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_selective_access_ui
This commit is contained in:
commit
35a22e2247
55 changed files with 2284 additions and 1102 deletions
|
@ -130,6 +130,7 @@ jobs:
|
|||
pip install "langfuse>=2.0.0"
|
||||
pip install numpydoc
|
||||
pip install prisma
|
||||
pip install fastapi
|
||||
pip install "httpx==0.24.1"
|
||||
pip install "gunicorn==21.2.0"
|
||||
pip install "anyio==3.7.1"
|
||||
|
|
|
@ -1,18 +1,25 @@
|
|||
# Function Calling
|
||||
Function calling is supported with the following models on OpenAI, Azure OpenAI
|
||||
|
||||
- gpt-4
|
||||
- gpt-4-1106-preview
|
||||
- gpt-4-0613
|
||||
- gpt-3.5-turbo
|
||||
- gpt-3.5-turbo-1106
|
||||
- gpt-3.5-turbo-0613
|
||||
- Non OpenAI LLMs (litellm adds the function call to the prompt for these llms)
|
||||
## Checking if a model supports function calling
|
||||
|
||||
In addition, parallel function calls is supported on the following models:
|
||||
- gpt-4-1106-preview
|
||||
- gpt-3.5-turbo-1106
|
||||
Use `litellm.supports_function_calling(model="")` -> returns `True` if model supports Function calling, `False` if not
|
||||
|
||||
```python
|
||||
assert litellm.supports_function_calling(model="gpt-3.5-turbo") == True
|
||||
assert litellm.supports_function_calling(model="azure/gpt-4-1106-preview") == True
|
||||
assert litellm.supports_function_calling(model="palm/chat-bison") == False
|
||||
assert litellm.supports_function_calling(model="ollama/llama2") == False
|
||||
```
|
||||
|
||||
|
||||
## Checking if a model supports parallel function calling
|
||||
|
||||
Use `litellm.supports_parallel_function_calling(model="")` -> returns `True` if model supports parallel function calling, `False` if not
|
||||
|
||||
```python
|
||||
assert litellm.supports_parallel_function_calling(model="gpt-4-turbo-preview") == True
|
||||
assert litellm.supports_parallel_function_calling(model="gpt-4") == False
|
||||
```
|
||||
## Parallel Function calling
|
||||
Parallel function calling is the model's ability to perform multiple function calls together, allowing the effects and results of these function calls to be resolved in parallel
|
||||
|
||||
|
|
|
@ -291,7 +291,6 @@ Here's an example of using a bedrock model with LiteLLM
|
|||
| Anthropic Claude-V2.1 | `completion(model='bedrock/anthropic.claude-v2:1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
|
||||
| Anthropic Claude-V2 | `completion(model='bedrock/anthropic.claude-v2', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
|
||||
| Anthropic Claude-Instant V1 | `completion(model='bedrock/anthropic.claude-instant-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
|
||||
| Anthropic Claude-V1 | `completion(model='bedrock/anthropic.claude-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
|
||||
| Amazon Titan Lite | `completion(model='bedrock/amazon.titan-text-lite-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
|
||||
| Amazon Titan Express | `completion(model='bedrock/amazon.titan-text-express-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
|
||||
| Cohere Command | `completion(model='bedrock/cohere.command-text-v14', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# VertexAI - Google [Gemini, Model Garden]
|
||||
|
||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_VertextAI_Example.ipynb">
|
||||
|
@ -22,8 +25,36 @@ response = litellm.completion(model="gemini-pro", messages=[{"role": "user", "co
|
|||
|
||||
## OpenAI Proxy Usage
|
||||
|
||||
Here's how to use Vertex AI with the LiteLLM Proxy Server
|
||||
|
||||
1. Modify the config.yaml
|
||||
|
||||
<Tabs>
|
||||
|
||||
<TabItem value="completion_param" label="Different location per model">
|
||||
|
||||
Use this when you need to set a different location for each vertex model
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gemini-vision
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-1.0-pro-vision-001
|
||||
vertex_project: "project-id"
|
||||
vertex_location: "us-central1"
|
||||
- model_name: gemini-vision
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-1.0-pro-vision-001
|
||||
vertex_project: "project-id2"
|
||||
vertex_location: "us-east"
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="litellm_param" label="One location all vertex models">
|
||||
|
||||
Use this when you have one vertex location for all models
|
||||
|
||||
```yaml
|
||||
litellm_settings:
|
||||
vertex_project: "hardy-device-38811" # Your Project ID
|
||||
|
@ -35,6 +66,10 @@ model_list:
|
|||
model: gemini-pro
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
2. Start the proxy
|
||||
|
||||
```bash
|
||||
|
|
44
docs/my-website/docs/proxy/metrics.md
Normal file
44
docs/my-website/docs/proxy/metrics.md
Normal file
|
@ -0,0 +1,44 @@
|
|||
# 💸 GET Daily Spend, Usage Metrics
|
||||
|
||||
## Request Format
|
||||
```shell
|
||||
curl -X GET "http://0.0.0.0:4000/daily_metrics" -H "Authorization: Bearer sk-1234"
|
||||
```
|
||||
|
||||
## Response format
|
||||
```json
|
||||
[
|
||||
daily_spend = [
|
||||
{
|
||||
"daily_spend": 7.9261938052047e+16,
|
||||
"day": "2024-02-01T00:00:00",
|
||||
"spend_per_model": {"azure/gpt-4": 7.9261938052047e+16},
|
||||
"spend_per_api_key": {
|
||||
"76": 914495704992000.0,
|
||||
"12": 905726697912000.0,
|
||||
"71": 866312628003000.0,
|
||||
"28": 865461799332000.0,
|
||||
"13": 859151538396000.0
|
||||
}
|
||||
},
|
||||
{
|
||||
"daily_spend": 7.938489251309491e+16,
|
||||
"day": "2024-02-02T00:00:00",
|
||||
"spend_per_model": {"gpt-3.5": 7.938489251309491e+16},
|
||||
"spend_per_api_key": {
|
||||
"91": 896805036036000.0,
|
||||
"78": 889692646082000.0,
|
||||
"49": 885386687861000.0,
|
||||
"28": 873869890984000.0,
|
||||
"56": 867398637692000.0
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
total_spend = 200,
|
||||
top_models = {"gpt4": 0.2, "vertexai/gemini-pro":10},
|
||||
top_api_keys = {"899922": 0.9, "838hcjd999seerr88": 20}
|
||||
|
||||
]
|
||||
|
||||
```
|
|
@ -40,6 +40,7 @@ const sidebars = {
|
|||
"proxy/virtual_keys",
|
||||
"proxy/users",
|
||||
"proxy/ui",
|
||||
"proxy/metrics",
|
||||
"proxy/model_management",
|
||||
"proxy/health",
|
||||
"proxy/debugging",
|
||||
|
|
|
@ -110,3 +110,138 @@ async def view_spend_logs_from_clickhouse(
|
|||
"log_count": num_rows,
|
||||
}
|
||||
return response_data
|
||||
|
||||
|
||||
def _create_clickhouse_material_views(client=None, table_names=[]):
|
||||
# Create Materialized Views if they don't exist
|
||||
# Materialized Views send new inserted rows to the aggregate tables
|
||||
|
||||
verbose_logger.debug("Clickhouse: Creating Materialized Views")
|
||||
if "daily_aggregated_spend_per_model_mv" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
|
||||
TO daily_aggregated_spend_per_model
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend,
|
||||
model as model
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day, model
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_api_key_mv" not in table_names:
|
||||
verbose_logger.debug(
|
||||
"Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
|
||||
)
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
|
||||
TO daily_aggregated_spend_per_api_key
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend,
|
||||
api_key as api_key
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day, api_key
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_user_mv" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
|
||||
TO daily_aggregated_spend_per_user
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend,
|
||||
user as user
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day, user
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_mv" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
|
||||
TO daily_aggregated_spend
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
|
||||
# Basic Logging works without this - this is only used for low latency reporting apis
|
||||
verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
|
||||
|
||||
# Create Aggregeate Tables if they don't exist
|
||||
if "daily_aggregated_spend_per_model" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend_per_model
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
`model` String
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day, model);
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_api_key" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend_per_api_key
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
`api_key` String
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day, api_key);
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_user" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend_per_user
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
`user` String
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day, user);
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day);
|
||||
"""
|
||||
)
|
||||
return
|
||||
|
|
|
@ -549,6 +549,8 @@ from .utils import (
|
|||
token_counter,
|
||||
cost_per_token,
|
||||
completion_cost,
|
||||
supports_function_calling,
|
||||
supports_parallel_function_calling,
|
||||
get_litellm_params,
|
||||
Logging,
|
||||
acreate,
|
||||
|
|
|
@ -27,6 +27,151 @@ import litellm, uuid
|
|||
from litellm._logging import print_verbose, verbose_logger
|
||||
|
||||
|
||||
def create_client():
|
||||
try:
|
||||
import clickhouse_connect
|
||||
|
||||
port = os.getenv("CLICKHOUSE_PORT")
|
||||
clickhouse_host = os.getenv("CLICKHOUSE_HOST")
|
||||
if clickhouse_host is not None:
|
||||
verbose_logger.debug("setting up clickhouse")
|
||||
if port is not None and isinstance(port, str):
|
||||
port = int(port)
|
||||
|
||||
client = clickhouse_connect.get_client(
|
||||
host=os.getenv("CLICKHOUSE_HOST"),
|
||||
port=port,
|
||||
username=os.getenv("CLICKHOUSE_USERNAME"),
|
||||
password=os.getenv("CLICKHOUSE_PASSWORD"),
|
||||
)
|
||||
return client
|
||||
else:
|
||||
raise Exception("Clickhouse: Clickhouse host not set")
|
||||
except Exception as e:
|
||||
raise ValueError(f"Clickhouse: {e}")
|
||||
|
||||
|
||||
def build_daily_metrics():
|
||||
click_house_client = create_client()
|
||||
|
||||
# get daily spend
|
||||
daily_spend = click_house_client.query_df(
|
||||
"""
|
||||
SELECT sumMerge(DailySpend) as daily_spend, day FROM daily_aggregated_spend GROUP BY day
|
||||
"""
|
||||
)
|
||||
|
||||
# get daily spend per model
|
||||
daily_spend_per_model = click_house_client.query_df(
|
||||
"""
|
||||
SELECT sumMerge(DailySpend) as daily_spend, day, model FROM daily_aggregated_spend_per_model GROUP BY day, model
|
||||
"""
|
||||
)
|
||||
new_df = daily_spend_per_model.to_dict(orient="records")
|
||||
import pandas as pd
|
||||
|
||||
df = pd.DataFrame(new_df)
|
||||
# Group by 'day' and create a dictionary for each group
|
||||
result_dict = {}
|
||||
for day, group in df.groupby("day"):
|
||||
models = group["model"].tolist()
|
||||
spend = group["daily_spend"].tolist()
|
||||
spend_per_model = {model: spend for model, spend in zip(models, spend)}
|
||||
result_dict[day] = spend_per_model
|
||||
|
||||
# Display the resulting dictionary
|
||||
|
||||
# get daily spend per API key
|
||||
daily_spend_per_api_key = click_house_client.query_df(
|
||||
"""
|
||||
SELECT
|
||||
daily_spend,
|
||||
day,
|
||||
api_key
|
||||
FROM (
|
||||
SELECT
|
||||
sumMerge(DailySpend) as daily_spend,
|
||||
day,
|
||||
api_key,
|
||||
RANK() OVER (PARTITION BY day ORDER BY sumMerge(DailySpend) DESC) as spend_rank
|
||||
FROM
|
||||
daily_aggregated_spend_per_api_key
|
||||
GROUP BY
|
||||
day,
|
||||
api_key
|
||||
) AS ranked_api_keys
|
||||
WHERE
|
||||
spend_rank <= 5
|
||||
AND day IS NOT NULL
|
||||
ORDER BY
|
||||
day,
|
||||
daily_spend DESC
|
||||
"""
|
||||
)
|
||||
new_df = daily_spend_per_api_key.to_dict(orient="records")
|
||||
import pandas as pd
|
||||
|
||||
df = pd.DataFrame(new_df)
|
||||
# Group by 'day' and create a dictionary for each group
|
||||
api_key_result_dict = {}
|
||||
for day, group in df.groupby("day"):
|
||||
api_keys = group["api_key"].tolist()
|
||||
spend = group["daily_spend"].tolist()
|
||||
spend_per_api_key = {api_key: spend for api_key, spend in zip(api_keys, spend)}
|
||||
api_key_result_dict[day] = spend_per_api_key
|
||||
|
||||
# Display the resulting dictionary
|
||||
|
||||
# Calculate total spend across all days
|
||||
total_spend = daily_spend["daily_spend"].sum()
|
||||
|
||||
# Identify top models and top API keys with the highest spend across all days
|
||||
top_models = {}
|
||||
top_api_keys = {}
|
||||
|
||||
for day, spend_per_model in result_dict.items():
|
||||
for model, model_spend in spend_per_model.items():
|
||||
if model not in top_models or model_spend > top_models[model]:
|
||||
top_models[model] = model_spend
|
||||
|
||||
for day, spend_per_api_key in api_key_result_dict.items():
|
||||
for api_key, api_key_spend in spend_per_api_key.items():
|
||||
if api_key not in top_api_keys or api_key_spend > top_api_keys[api_key]:
|
||||
top_api_keys[api_key] = api_key_spend
|
||||
|
||||
# for each day in daily spend, look up the day in result_dict and api_key_result_dict
|
||||
# Assuming daily_spend DataFrame has 'day' column
|
||||
result = []
|
||||
for index, row in daily_spend.iterrows():
|
||||
day = row["day"]
|
||||
data_day = row.to_dict()
|
||||
|
||||
# Look up in result_dict
|
||||
if day in result_dict:
|
||||
spend_per_model = result_dict[day]
|
||||
# Assuming there is a column named 'model' in daily_spend
|
||||
data_day["spend_per_model"] = spend_per_model # Assign 0 if model not found
|
||||
|
||||
# Look up in api_key_result_dict
|
||||
if day in api_key_result_dict:
|
||||
spend_per_api_key = api_key_result_dict[day]
|
||||
# Assuming there is a column named 'api_key' in daily_spend
|
||||
data_day["spend_per_api_key"] = spend_per_api_key
|
||||
|
||||
result.append(data_day)
|
||||
|
||||
data_to_return = {}
|
||||
data_to_return["daily_spend"] = result
|
||||
|
||||
data_to_return["total_spend"] = total_spend
|
||||
data_to_return["top_models"] = top_models
|
||||
data_to_return["top_api_keys"] = top_api_keys
|
||||
return data_to_return
|
||||
|
||||
|
||||
# build_daily_metrics()
|
||||
|
||||
|
||||
def _start_clickhouse():
|
||||
import clickhouse_connect
|
||||
|
||||
|
@ -86,6 +231,14 @@ def _start_clickhouse():
|
|||
response = client.query("DESCRIBE default.spend_logs")
|
||||
verbose_logger.debug(f"spend logs schema ={response.result_rows}")
|
||||
|
||||
# RUN Enterprise Clickhouse Setup
|
||||
# TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
|
||||
from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
|
||||
from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
|
||||
|
||||
_create_clickhouse_aggregate_tables(client=client, table_names=table_names)
|
||||
_create_clickhouse_material_views(client=client, table_names=table_names)
|
||||
|
||||
|
||||
class ClickhouseLogger:
|
||||
# Class variables or attributes
|
||||
|
|
|
@ -278,7 +278,11 @@ def completion(
|
|||
import google.auth
|
||||
|
||||
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
||||
print_verbose(
|
||||
f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
|
||||
)
|
||||
creds, _ = google.auth.default(quota_project_id=vertex_project)
|
||||
print_verbose(f"VERTEX AI: creds={creds}")
|
||||
vertexai.init(
|
||||
project=vertex_project, location=vertex_location, credentials=creds
|
||||
)
|
||||
|
|
|
@ -1467,12 +1467,14 @@ def completion(
|
|||
response = model_response
|
||||
elif custom_llm_provider == "vertex_ai":
|
||||
vertex_ai_project = (
|
||||
optional_params.pop("vertex_ai_project", None)
|
||||
optional_params.pop("vertex_project", None)
|
||||
or optional_params.pop("vertex_ai_project", None)
|
||||
or litellm.vertex_project
|
||||
or get_secret("VERTEXAI_PROJECT")
|
||||
)
|
||||
vertex_ai_location = (
|
||||
optional_params.pop("vertex_ai_location", None)
|
||||
optional_params.pop("vertex_location", None)
|
||||
or optional_params.pop("vertex_ai_location", None)
|
||||
or litellm.vertex_location
|
||||
or get_secret("VERTEXAI_LOCATION")
|
||||
)
|
||||
|
@ -2566,12 +2568,14 @@ def embedding(
|
|||
)
|
||||
elif custom_llm_provider == "vertex_ai":
|
||||
vertex_ai_project = (
|
||||
optional_params.pop("vertex_ai_project", None)
|
||||
optional_params.pop("vertex_project", None)
|
||||
or optional_params.pop("vertex_ai_project", None)
|
||||
or litellm.vertex_project
|
||||
or get_secret("VERTEXAI_PROJECT")
|
||||
)
|
||||
vertex_ai_location = (
|
||||
optional_params.pop("vertex_ai_location", None)
|
||||
optional_params.pop("vertex_location", None)
|
||||
or optional_params.pop("vertex_ai_location", None)
|
||||
or litellm.vertex_location
|
||||
or get_secret("VERTEXAI_LOCATION")
|
||||
)
|
||||
|
|
|
@ -6,7 +6,8 @@
|
|||
"input_cost_per_token": 0.00003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"gpt-4-turbo-preview": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -15,7 +16,9 @@
|
|||
"input_cost_per_token": 0.00001,
|
||||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"gpt-4-0314": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -33,7 +36,8 @@
|
|||
"input_cost_per_token": 0.00003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"gpt-4-32k": {
|
||||
"max_tokens": 32768,
|
||||
|
@ -69,7 +73,9 @@
|
|||
"input_cost_per_token": 0.00001,
|
||||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"gpt-4-0125-preview": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -78,7 +84,9 @@
|
|||
"input_cost_per_token": 0.00001,
|
||||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"gpt-4-vision-preview": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -105,7 +113,8 @@
|
|||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"gpt-3.5-turbo-0301": {
|
||||
"max_tokens": 4097,
|
||||
|
@ -123,7 +132,8 @@
|
|||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"gpt-3.5-turbo-1106": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -132,7 +142,9 @@
|
|||
"input_cost_per_token": 0.0000010,
|
||||
"output_cost_per_token": 0.0000020,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"gpt-3.5-turbo-0125": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -141,7 +153,9 @@
|
|||
"input_cost_per_token": 0.0000005,
|
||||
"output_cost_per_token": 0.0000015,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"gpt-3.5-turbo-16k": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -286,7 +300,9 @@
|
|||
"input_cost_per_token": 0.00001,
|
||||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"azure/gpt-4-1106-preview": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -295,7 +311,9 @@
|
|||
"input_cost_per_token": 0.00001,
|
||||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"azure/gpt-4-0613": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -304,7 +322,8 @@
|
|||
"input_cost_per_token": 0.00003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"azure/gpt-4-32k-0613": {
|
||||
"max_tokens": 32768,
|
||||
|
@ -331,7 +350,8 @@
|
|||
"input_cost_per_token": 0.00003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"azure/gpt-4-turbo": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -340,7 +360,9 @@
|
|||
"input_cost_per_token": 0.00001,
|
||||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"azure/gpt-4-turbo-vision-preview": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -358,7 +380,8 @@
|
|||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"azure/gpt-35-turbo-1106": {
|
||||
"max_tokens": 16384,
|
||||
|
@ -367,7 +390,20 @@
|
|||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"azure/gpt-35-turbo-0125": {
|
||||
"max_tokens": 16384,
|
||||
"max_input_tokens": 16384,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
"output_cost_per_token": 0.0000015,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"azure/gpt-35-turbo-16k": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -385,7 +421,8 @@
|
|||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"azure/ada": {
|
||||
"max_tokens": 8191,
|
||||
|
@ -514,11 +551,12 @@
|
|||
"mode": "chat"
|
||||
},
|
||||
"mistral/mistral-large-latest": {
|
||||
"max_tokens": 8192,
|
||||
"max_tokens": 32000,
|
||||
"input_cost_per_token": 0.000008,
|
||||
"output_cost_per_token": 0.000024,
|
||||
"litellm_provider": "mistral",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"mistral/mistral-embed": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -676,7 +714,8 @@
|
|||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"gemini-1.5-pro": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -687,6 +726,15 @@
|
|||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-1.5-pro-preview-0215": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-pro-vision": {
|
||||
"max_tokens": 16384,
|
||||
"max_output_tokens": 2048,
|
||||
|
@ -1729,6 +1777,23 @@
|
|||
"output_cost_per_token": 0.0000009,
|
||||
"litellm_provider": "together_ai"
|
||||
},
|
||||
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
|
||||
"input_cost_per_token": 0.0000006,
|
||||
"output_cost_per_token": 0.0000006,
|
||||
"litellm_provider": "together_ai",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"together_ai/mistralai/Mistral-7B-Instruct-v0.1": {
|
||||
"litellm_provider": "together_ai",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"together_ai/togethercomputer/CodeLlama-34b-Instruct": {
|
||||
"litellm_provider": "together_ai",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"ollama/llama2": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.0,
|
||||
|
@ -1981,7 +2046,16 @@
|
|||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000015,
|
||||
"litellm_provider": "anyscale",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"anyscale/Mixtral-8x7B-Instruct-v0.1": {
|
||||
"max_tokens": 16384,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000015,
|
||||
"litellm_provider": "anyscale",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"anyscale/HuggingFaceH4/zephyr-7b-beta": {
|
||||
"max_tokens": 16384,
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
|
|
@ -0,0 +1 @@
|
|||
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()
|
66
litellm/proxy/custom_callbacks.py
Normal file
66
litellm/proxy/custom_callbacks.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
from litellm.integrations.custom_logger import CustomLogger
|
||||
import litellm
|
||||
|
||||
|
||||
# This file includes the custom callbacks for LiteLLM Proxy
|
||||
# Once defined, these can be passed in proxy_config.yaml
|
||||
class MyCustomHandler(CustomLogger):
|
||||
def log_pre_api_call(self, model, messages, kwargs):
|
||||
print(f"Pre-API Call") # noqa
|
||||
|
||||
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"Post-API Call") # noqa
|
||||
|
||||
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"On Stream") # noqa
|
||||
|
||||
def log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print("On Success") # noqa
|
||||
|
||||
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"On Failure") # noqa
|
||||
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"ishaan async_log_success_event") # noqa
|
||||
# log: key, user, model, prompt, response, tokens, cost
|
||||
# Access kwargs passed to litellm.completion()
|
||||
model = kwargs.get("model", None)
|
||||
messages = kwargs.get("messages", None)
|
||||
user = kwargs.get("user", None)
|
||||
|
||||
# Access litellm_params passed to litellm.completion(), example access `metadata`
|
||||
litellm_params = kwargs.get("litellm_params", {})
|
||||
metadata = litellm_params.get(
|
||||
"metadata", {}
|
||||
) # headers passed to LiteLLM proxy, can be found here
|
||||
|
||||
return
|
||||
|
||||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||
try:
|
||||
print(f"On Async Failure !") # noqa
|
||||
print("\nkwargs", kwargs) # noqa
|
||||
# Access kwargs passed to litellm.completion()
|
||||
model = kwargs.get("model", None)
|
||||
messages = kwargs.get("messages", None)
|
||||
user = kwargs.get("user", None)
|
||||
|
||||
# Access litellm_params passed to litellm.completion(), example access `metadata`
|
||||
litellm_params = kwargs.get("litellm_params", {})
|
||||
metadata = litellm_params.get(
|
||||
"metadata", {}
|
||||
) # headers passed to LiteLLM proxy, can be found here
|
||||
|
||||
# Acess Exceptions & Traceback
|
||||
exception_event = kwargs.get("exception", None)
|
||||
traceback_event = kwargs.get("traceback_exception", None)
|
||||
|
||||
# Calculate cost using litellm.completion_cost()
|
||||
except Exception as e:
|
||||
print(f"Exception: {e}") # noqa
|
||||
|
||||
|
||||
proxy_handler_instance = MyCustomHandler()
|
||||
|
||||
# Set litellm.callbacks = [proxy_handler_instance] on the proxy
|
||||
# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
|
|
@ -45,7 +45,7 @@ litellm_settings:
|
|||
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
|
||||
success_callback: ['langfuse']
|
||||
# setting callback class
|
||||
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
||||
callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
|
|
|
@ -240,6 +240,8 @@ health_check_results = {}
|
|||
queue: List = []
|
||||
litellm_proxy_budget_name = "litellm-proxy-budget"
|
||||
ui_access_mode: Literal["admin", "all"] = "all"
|
||||
proxy_budget_rescheduler_min_time = 597
|
||||
proxy_budget_rescheduler_max_time = 605
|
||||
### INITIALIZE GLOBAL LOGGING OBJECT ###
|
||||
proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache)
|
||||
### REDIS QUEUE ###
|
||||
|
@ -1407,7 +1409,7 @@ class ProxyConfig:
|
|||
"""
|
||||
Load config values into proxy global state
|
||||
"""
|
||||
global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, ui_access_mode
|
||||
global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, proxy_budget_rescheduler_max_time, proxy_budget_rescheduler_min_time, ui_access_mode
|
||||
|
||||
# Load existing config
|
||||
config = await self.get_config(config_file_path=config_file_path)
|
||||
|
@ -1718,6 +1720,13 @@ class ProxyConfig:
|
|||
ui_access_mode = general_settings.get(
|
||||
"ui_access_mode", "all"
|
||||
) # can be either ["admin_only" or "all"]
|
||||
## BUDGET RESCHEDULER ##
|
||||
proxy_budget_rescheduler_min_time = general_settings.get(
|
||||
"proxy_budget_rescheduler_min_time", proxy_budget_rescheduler_min_time
|
||||
)
|
||||
proxy_budget_rescheduler_max_time = general_settings.get(
|
||||
"proxy_budget_rescheduler_max_time", proxy_budget_rescheduler_max_time
|
||||
)
|
||||
### BACKGROUND HEALTH CHECKS ###
|
||||
# Enable background health checks
|
||||
use_background_health_checks = general_settings.get(
|
||||
|
@ -2120,10 +2129,9 @@ async def async_data_generator(response, user_api_key_dict):
|
|||
try:
|
||||
start_time = time.time()
|
||||
async for chunk in response:
|
||||
verbose_proxy_logger.debug(f"returned chunk: {chunk}")
|
||||
assert isinstance(chunk, litellm.ModelResponse)
|
||||
chunk = chunk.model_dump_json(exclude_none=True)
|
||||
try:
|
||||
yield f"data: {json.dumps(chunk.model_dump(exclude_none=True))}\n\n"
|
||||
yield f"data: {chunk}\n\n"
|
||||
except Exception as e:
|
||||
yield f"data: {str(e)}\n\n"
|
||||
|
||||
|
@ -2202,7 +2210,7 @@ def parse_cache_control(cache_control):
|
|||
|
||||
@router.on_event("startup")
|
||||
async def startup_event():
|
||||
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings
|
||||
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time
|
||||
import json
|
||||
|
||||
### LOAD MASTER KEY ###
|
||||
|
@ -2307,13 +2315,12 @@ async def startup_event():
|
|||
### CHECK IF VIEW EXISTS ###
|
||||
if prisma_client is not None:
|
||||
create_view_response = await prisma_client.check_view_exists()
|
||||
print(f"create_view_response: {create_view_response}") # noqa
|
||||
|
||||
### START BUDGET SCHEDULER ###
|
||||
if prisma_client is not None:
|
||||
scheduler = AsyncIOScheduler()
|
||||
interval = random.randint(
|
||||
597, 605
|
||||
proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time
|
||||
) # random interval, so multiple workers avoid resetting budget at the same time
|
||||
scheduler.add_job(
|
||||
reset_budget, "interval", seconds=interval, args=[prisma_client]
|
||||
|
@ -3780,7 +3787,7 @@ async def view_spend_tags(
|
|||
|
||||
@router.get(
|
||||
"/spend/logs",
|
||||
tags=["budget & spend Tracking"],
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
responses={
|
||||
200: {"model": List[LiteLLM_SpendLogs]},
|
||||
|
@ -3839,13 +3846,55 @@ async def view_spend_logs(
|
|||
# gettting spend logs from clickhouse
|
||||
from litellm.proxy.enterprise.utils import view_spend_logs_from_clickhouse
|
||||
|
||||
return await view_spend_logs_from_clickhouse(
|
||||
api_key=api_key,
|
||||
user_id=user_id,
|
||||
request_id=request_id,
|
||||
daily_metrics = await view_daily_metrics(
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# get the top api keys across all daily_metrics
|
||||
top_api_keys = {} # type: ignore
|
||||
|
||||
# make this compatible with the admin UI
|
||||
for response in daily_metrics.get("daily_spend", {}):
|
||||
response["startTime"] = response["day"]
|
||||
response["spend"] = response["daily_spend"]
|
||||
response["models"] = response["spend_per_model"]
|
||||
response["users"] = {"ishaan": 0.0}
|
||||
spend_per_api_key = response["spend_per_api_key"]
|
||||
|
||||
# insert spend_per_api_key key, values in response
|
||||
for key, value in spend_per_api_key.items():
|
||||
response[key] = value
|
||||
top_api_keys[key] = top_api_keys.get(key, 0.0) + value
|
||||
|
||||
del response["day"]
|
||||
del response["daily_spend"]
|
||||
del response["spend_per_model"]
|
||||
del response["spend_per_api_key"]
|
||||
|
||||
# get top 5 api keys
|
||||
top_api_keys = sorted(top_api_keys.items(), key=lambda x: x[1], reverse=True) # type: ignore
|
||||
top_api_keys = top_api_keys[:5] # type: ignore
|
||||
top_api_keys = dict(top_api_keys) # type: ignore
|
||||
"""
|
||||
set it like this
|
||||
{
|
||||
"key" : key,
|
||||
"spend:" : spend
|
||||
}
|
||||
"""
|
||||
# we need this to show on the Admin UI
|
||||
response_keys = []
|
||||
for key in top_api_keys.items():
|
||||
response_keys.append(
|
||||
{
|
||||
"key": key[0],
|
||||
"spend": key[1],
|
||||
}
|
||||
)
|
||||
daily_metrics["top_api_keys"] = response_keys
|
||||
|
||||
return daily_metrics
|
||||
global prisma_client
|
||||
try:
|
||||
verbose_proxy_logger.debug("inside view_spend_logs")
|
||||
|
@ -3998,6 +4047,142 @@ async def view_spend_logs(
|
|||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/global/spend/logs",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def global_spend_logs():
|
||||
"""
|
||||
[BETA] This is a beta endpoint. It will change.
|
||||
|
||||
Use this to get global spend (spend per day for last 30d). Admin-only endpoint
|
||||
|
||||
More efficient implementation of /spend/logs, by creating a view over the spend logs table.
|
||||
"""
|
||||
global prisma_client
|
||||
|
||||
sql_query = """SELECT * FROM "MonthlyGlobalSpend";"""
|
||||
|
||||
response = await prisma_client.db.query_raw(query=sql_query)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@router.get(
|
||||
"/global/spend/keys",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def global_spend_keys(
|
||||
limit: int = fastapi.Query(
|
||||
default=None,
|
||||
description="Number of keys to get. Will return Top 'n' keys.",
|
||||
)
|
||||
):
|
||||
"""
|
||||
[BETA] This is a beta endpoint. It will change.
|
||||
|
||||
Use this to get the top 'n' keys with the highest spend, ordered by spend.
|
||||
"""
|
||||
global prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||
sql_query = f"""SELECT * FROM "Last30dKeysBySpend" LIMIT {limit};"""
|
||||
|
||||
response = await prisma_client.db.query_raw(query=sql_query)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@router.get(
|
||||
"/global/spend/models",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def global_spend_models(
|
||||
limit: int = fastapi.Query(
|
||||
default=None,
|
||||
description="Number of models to get. Will return Top 'n' models.",
|
||||
)
|
||||
):
|
||||
"""
|
||||
[BETA] This is a beta endpoint. It will change.
|
||||
|
||||
Use this to get the top 'n' keys with the highest spend, ordered by spend.
|
||||
"""
|
||||
global prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||
|
||||
sql_query = f"""SELECT * FROM "Last30dModelsBySpend" LIMIT {limit};"""
|
||||
|
||||
response = await prisma_client.db.query_raw(query=sql_query)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@router.get(
|
||||
"/daily_metrics",
|
||||
summary="Get daily spend metrics",
|
||||
tags=["budget & spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def view_daily_metrics(
|
||||
start_date: Optional[str] = fastapi.Query(
|
||||
default=None,
|
||||
description="Time from which to start viewing key spend",
|
||||
),
|
||||
end_date: Optional[str] = fastapi.Query(
|
||||
default=None,
|
||||
description="Time till which to view key spend",
|
||||
),
|
||||
):
|
||||
"""
|
||||
[BETA] This is a beta endpoint. It might change without notice.
|
||||
|
||||
Please give feedback - https://github.com/BerriAI/litellm/issues
|
||||
"""
|
||||
try:
|
||||
if os.getenv("CLICKHOUSE_HOST") is not None:
|
||||
# gettting spend logs from clickhouse
|
||||
from litellm.integrations import clickhouse
|
||||
|
||||
return clickhouse.build_daily_metrics()
|
||||
|
||||
# create a response object
|
||||
"""
|
||||
{
|
||||
"date": "2022-01-01",
|
||||
"spend": 0.0,
|
||||
"users": {},
|
||||
"models": {},
|
||||
}
|
||||
"""
|
||||
else:
|
||||
raise Exception(
|
||||
"Clickhouse: Clickhouse host not set. Required for viewing /daily/metrics"
|
||||
)
|
||||
except Exception as e:
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "detail", f"/spend/logs Error({str(e)})"),
|
||||
type="internal_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
|
||||
)
|
||||
elif isinstance(e, ProxyException):
|
||||
raise e
|
||||
raise ProxyException(
|
||||
message="/spend/logs Error" + str(e),
|
||||
type="internal_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
|
||||
|
||||
#### USER MANAGEMENT ####
|
||||
@router.post(
|
||||
"/user/new",
|
||||
|
|
|
@ -489,18 +489,20 @@ class PrismaClient:
|
|||
)
|
||||
async def check_view_exists(self):
|
||||
"""
|
||||
Checks if the LiteLLM_VerificationTokenView exists in the user's db.
|
||||
Checks if the LiteLLM_VerificationTokenView and MonthlyGlobalSpend exists in the user's db.
|
||||
|
||||
This is used for getting the token + team data in user_api_key_auth
|
||||
LiteLLM_VerificationTokenView: This view is used for getting the token + team data in user_api_key_auth
|
||||
|
||||
MonthlyGlobalSpend: This view is used for the admin view to see global spend for this month
|
||||
|
||||
If the view doesn't exist, one will be created.
|
||||
"""
|
||||
try:
|
||||
# Try to select one row from the view
|
||||
await self.db.execute_raw(
|
||||
await self.db.query_raw(
|
||||
"""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1"""
|
||||
)
|
||||
return "LiteLLM_VerificationTokenView Exists!"
|
||||
print("LiteLLM_VerificationTokenView Exists!") # noqa
|
||||
except Exception as e:
|
||||
# If an error occurs, the view does not exist, so create it
|
||||
value = await self.health_check()
|
||||
|
@ -518,7 +520,29 @@ class PrismaClient:
|
|||
"""
|
||||
)
|
||||
|
||||
return "LiteLLM_VerificationTokenView Created!"
|
||||
print("LiteLLM_VerificationTokenView Created!") # noqa
|
||||
|
||||
try:
|
||||
await self.db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""")
|
||||
print("MonthlyGlobalSpend Exists!") # noqa
|
||||
except Exception as e:
|
||||
sql_query = """
|
||||
CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS
|
||||
SELECT
|
||||
DATE("startTime") AS date,
|
||||
SUM("spend") AS spend
|
||||
FROM
|
||||
"LiteLLM_SpendLogs"
|
||||
WHERE
|
||||
"startTime" >= (CURRENT_DATE - INTERVAL '30 days')
|
||||
GROUP BY
|
||||
DATE("startTime");
|
||||
"""
|
||||
await self.db.execute_raw(query=sql_query)
|
||||
|
||||
print("MonthlyGlobalSpend Created!") # noqa
|
||||
|
||||
return
|
||||
|
||||
@backoff.on_exception(
|
||||
backoff.expo,
|
||||
|
|
|
@ -1,253 +1,254 @@
|
|||
import sys
|
||||
import os
|
||||
import io, asyncio
|
||||
## @pytest.mark.skip(reason="AWS Suspended Account")
|
||||
# import sys
|
||||
# import os
|
||||
# import io, asyncio
|
||||
|
||||
# import logging
|
||||
# logging.basicConfig(level=logging.DEBUG)
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
# # import logging
|
||||
# # logging.basicConfig(level=logging.DEBUG)
|
||||
# sys.path.insert(0, os.path.abspath("../.."))
|
||||
|
||||
from litellm import completion
|
||||
import litellm
|
||||
# from litellm import completion
|
||||
# import litellm
|
||||
|
||||
litellm.num_retries = 3
|
||||
# litellm.num_retries = 3
|
||||
|
||||
import time, random
|
||||
import pytest
|
||||
# import time, random
|
||||
# import pytest
|
||||
|
||||
|
||||
def test_s3_logging():
|
||||
# all s3 requests need to be in one test function
|
||||
# since we are modifying stdout, and pytests runs tests in parallel
|
||||
# on circle ci - we only test litellm.acompletion()
|
||||
try:
|
||||
# redirect stdout to log_file
|
||||
litellm.cache = litellm.Cache(
|
||||
type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
|
||||
)
|
||||
# def test_s3_logging():
|
||||
# # all s3 requests need to be in one test function
|
||||
# # since we are modifying stdout, and pytests runs tests in parallel
|
||||
# # on circle ci - we only test litellm.acompletion()
|
||||
# try:
|
||||
# # redirect stdout to log_file
|
||||
# litellm.cache = litellm.Cache(
|
||||
# type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
|
||||
# )
|
||||
|
||||
litellm.success_callback = ["s3"]
|
||||
litellm.s3_callback_params = {
|
||||
"s3_bucket_name": "litellm-logs",
|
||||
"s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
|
||||
"s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
|
||||
}
|
||||
litellm.set_verbose = True
|
||||
# litellm.success_callback = ["s3"]
|
||||
# litellm.s3_callback_params = {
|
||||
# "s3_bucket_name": "litellm-logs",
|
||||
# "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
|
||||
# "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
|
||||
# }
|
||||
# litellm.set_verbose = True
|
||||
|
||||
print("Testing async s3 logging")
|
||||
# print("Testing async s3 logging")
|
||||
|
||||
expected_keys = []
|
||||
# expected_keys = []
|
||||
|
||||
import time
|
||||
# import time
|
||||
|
||||
curr_time = str(time.time())
|
||||
# curr_time = str(time.time())
|
||||
|
||||
async def _test():
|
||||
return await litellm.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
||||
max_tokens=10,
|
||||
temperature=0.7,
|
||||
user="ishaan-2",
|
||||
)
|
||||
# async def _test():
|
||||
# return await litellm.acompletion(
|
||||
# model="gpt-3.5-turbo",
|
||||
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
||||
# max_tokens=10,
|
||||
# temperature=0.7,
|
||||
# user="ishaan-2",
|
||||
# )
|
||||
|
||||
response = asyncio.run(_test())
|
||||
print(f"response: {response}")
|
||||
expected_keys.append(response.id)
|
||||
# response = asyncio.run(_test())
|
||||
# print(f"response: {response}")
|
||||
# expected_keys.append(response.id)
|
||||
|
||||
async def _test():
|
||||
return await litellm.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
||||
max_tokens=10,
|
||||
temperature=0.7,
|
||||
user="ishaan-2",
|
||||
)
|
||||
# async def _test():
|
||||
# return await litellm.acompletion(
|
||||
# model="gpt-3.5-turbo",
|
||||
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
||||
# max_tokens=10,
|
||||
# temperature=0.7,
|
||||
# user="ishaan-2",
|
||||
# )
|
||||
|
||||
response = asyncio.run(_test())
|
||||
expected_keys.append(response.id)
|
||||
print(f"response: {response}")
|
||||
time.sleep(5) # wait 5s for logs to land
|
||||
# response = asyncio.run(_test())
|
||||
# expected_keys.append(response.id)
|
||||
# print(f"response: {response}")
|
||||
# time.sleep(5) # wait 5s for logs to land
|
||||
|
||||
import boto3
|
||||
# import boto3
|
||||
|
||||
s3 = boto3.client("s3")
|
||||
bucket_name = "litellm-logs"
|
||||
# List objects in the bucket
|
||||
response = s3.list_objects(Bucket=bucket_name)
|
||||
# s3 = boto3.client("s3")
|
||||
# bucket_name = "litellm-logs"
|
||||
# # List objects in the bucket
|
||||
# response = s3.list_objects(Bucket=bucket_name)
|
||||
|
||||
# Sort the objects based on the LastModified timestamp
|
||||
objects = sorted(
|
||||
response["Contents"], key=lambda x: x["LastModified"], reverse=True
|
||||
)
|
||||
# Get the keys of the most recent objects
|
||||
most_recent_keys = [obj["Key"] for obj in objects]
|
||||
print(most_recent_keys)
|
||||
# for each key, get the part before "-" as the key. Do it safely
|
||||
cleaned_keys = []
|
||||
for key in most_recent_keys:
|
||||
split_key = key.split("_")
|
||||
if len(split_key) < 2:
|
||||
continue
|
||||
cleaned_keys.append(split_key[1])
|
||||
print("\n most recent keys", most_recent_keys)
|
||||
print("\n cleaned keys", cleaned_keys)
|
||||
print("\n Expected keys: ", expected_keys)
|
||||
matches = 0
|
||||
for key in expected_keys:
|
||||
key += ".json"
|
||||
assert key in cleaned_keys
|
||||
# # Sort the objects based on the LastModified timestamp
|
||||
# objects = sorted(
|
||||
# response["Contents"], key=lambda x: x["LastModified"], reverse=True
|
||||
# )
|
||||
# # Get the keys of the most recent objects
|
||||
# most_recent_keys = [obj["Key"] for obj in objects]
|
||||
# print(most_recent_keys)
|
||||
# # for each key, get the part before "-" as the key. Do it safely
|
||||
# cleaned_keys = []
|
||||
# for key in most_recent_keys:
|
||||
# split_key = key.split("_")
|
||||
# if len(split_key) < 2:
|
||||
# continue
|
||||
# cleaned_keys.append(split_key[1])
|
||||
# print("\n most recent keys", most_recent_keys)
|
||||
# print("\n cleaned keys", cleaned_keys)
|
||||
# print("\n Expected keys: ", expected_keys)
|
||||
# matches = 0
|
||||
# for key in expected_keys:
|
||||
# key += ".json"
|
||||
# assert key in cleaned_keys
|
||||
|
||||
if key in cleaned_keys:
|
||||
matches += 1
|
||||
# remove the match key
|
||||
cleaned_keys.remove(key)
|
||||
# this asserts we log, the first request + the 2nd cached request
|
||||
print("we had two matches ! passed ", matches)
|
||||
assert matches == 2
|
||||
try:
|
||||
# cleanup s3 bucket in test
|
||||
for key in most_recent_keys:
|
||||
s3.delete_object(Bucket=bucket_name, Key=key)
|
||||
except:
|
||||
# don't let cleanup fail a test
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred - {e}")
|
||||
finally:
|
||||
# post, close log file and verify
|
||||
# Reset stdout to the original value
|
||||
print("Passed! Testing async s3 logging")
|
||||
# if key in cleaned_keys:
|
||||
# matches += 1
|
||||
# # remove the match key
|
||||
# cleaned_keys.remove(key)
|
||||
# # this asserts we log, the first request + the 2nd cached request
|
||||
# print("we had two matches ! passed ", matches)
|
||||
# assert matches == 2
|
||||
# try:
|
||||
# # cleanup s3 bucket in test
|
||||
# for key in most_recent_keys:
|
||||
# s3.delete_object(Bucket=bucket_name, Key=key)
|
||||
# except:
|
||||
# # don't let cleanup fail a test
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"An exception occurred - {e}")
|
||||
# finally:
|
||||
# # post, close log file and verify
|
||||
# # Reset stdout to the original value
|
||||
# print("Passed! Testing async s3 logging")
|
||||
|
||||
|
||||
# test_s3_logging()
|
||||
# # test_s3_logging()
|
||||
|
||||
|
||||
def test_s3_logging_async():
|
||||
# this tests time added to make s3 logging calls, vs just acompletion calls
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
# Make 5 calls with an empty success_callback
|
||||
litellm.success_callback = []
|
||||
start_time_empty_callback = asyncio.run(make_async_calls())
|
||||
print("done with no callback test")
|
||||
# def test_s3_logging_async():
|
||||
# # this tests time added to make s3 logging calls, vs just acompletion calls
|
||||
# try:
|
||||
# litellm.set_verbose = True
|
||||
# # Make 5 calls with an empty success_callback
|
||||
# litellm.success_callback = []
|
||||
# start_time_empty_callback = asyncio.run(make_async_calls())
|
||||
# print("done with no callback test")
|
||||
|
||||
print("starting s3 logging load test")
|
||||
# Make 5 calls with success_callback set to "langfuse"
|
||||
litellm.success_callback = ["s3"]
|
||||
litellm.s3_callback_params = {
|
||||
"s3_bucket_name": "litellm-logs",
|
||||
"s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
|
||||
"s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
|
||||
}
|
||||
start_time_s3 = asyncio.run(make_async_calls())
|
||||
print("done with s3 test")
|
||||
# print("starting s3 logging load test")
|
||||
# # Make 5 calls with success_callback set to "langfuse"
|
||||
# litellm.success_callback = ["s3"]
|
||||
# litellm.s3_callback_params = {
|
||||
# "s3_bucket_name": "litellm-logs",
|
||||
# "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
|
||||
# "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
|
||||
# }
|
||||
# start_time_s3 = asyncio.run(make_async_calls())
|
||||
# print("done with s3 test")
|
||||
|
||||
# Compare the time for both scenarios
|
||||
print(f"Time taken with success_callback='s3': {start_time_s3}")
|
||||
print(f"Time taken with empty success_callback: {start_time_empty_callback}")
|
||||
# # Compare the time for both scenarios
|
||||
# print(f"Time taken with success_callback='s3': {start_time_s3}")
|
||||
# print(f"Time taken with empty success_callback: {start_time_empty_callback}")
|
||||
|
||||
# assert the diff is not more than 1 second
|
||||
assert abs(start_time_s3 - start_time_empty_callback) < 1
|
||||
# # assert the diff is not more than 1 second
|
||||
# assert abs(start_time_s3 - start_time_empty_callback) < 1
|
||||
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred - {e}")
|
||||
# except litellm.Timeout as e:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"An exception occurred - {e}")
|
||||
|
||||
|
||||
async def make_async_calls():
|
||||
tasks = []
|
||||
for _ in range(5):
|
||||
task = asyncio.create_task(
|
||||
litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
messages=[{"role": "user", "content": "This is a test"}],
|
||||
max_tokens=5,
|
||||
temperature=0.7,
|
||||
timeout=5,
|
||||
user="langfuse_latency_test_user",
|
||||
mock_response="It's simple to use and easy to get started",
|
||||
)
|
||||
)
|
||||
tasks.append(task)
|
||||
# async def make_async_calls():
|
||||
# tasks = []
|
||||
# for _ in range(5):
|
||||
# task = asyncio.create_task(
|
||||
# litellm.acompletion(
|
||||
# model="azure/chatgpt-v-2",
|
||||
# messages=[{"role": "user", "content": "This is a test"}],
|
||||
# max_tokens=5,
|
||||
# temperature=0.7,
|
||||
# timeout=5,
|
||||
# user="langfuse_latency_test_user",
|
||||
# mock_response="It's simple to use and easy to get started",
|
||||
# )
|
||||
# )
|
||||
# tasks.append(task)
|
||||
|
||||
# Measure the start time before running the tasks
|
||||
start_time = asyncio.get_event_loop().time()
|
||||
# # Measure the start time before running the tasks
|
||||
# start_time = asyncio.get_event_loop().time()
|
||||
|
||||
# Wait for all tasks to complete
|
||||
responses = await asyncio.gather(*tasks)
|
||||
# # Wait for all tasks to complete
|
||||
# responses = await asyncio.gather(*tasks)
|
||||
|
||||
# Print the responses when tasks return
|
||||
for idx, response in enumerate(responses):
|
||||
print(f"Response from Task {idx + 1}: {response}")
|
||||
# # Print the responses when tasks return
|
||||
# for idx, response in enumerate(responses):
|
||||
# print(f"Response from Task {idx + 1}: {response}")
|
||||
|
||||
# Calculate the total time taken
|
||||
total_time = asyncio.get_event_loop().time() - start_time
|
||||
# # Calculate the total time taken
|
||||
# total_time = asyncio.get_event_loop().time() - start_time
|
||||
|
||||
return total_time
|
||||
# return total_time
|
||||
|
||||
|
||||
def test_s3_logging_r2():
|
||||
# all s3 requests need to be in one test function
|
||||
# since we are modifying stdout, and pytests runs tests in parallel
|
||||
# on circle ci - we only test litellm.acompletion()
|
||||
try:
|
||||
# redirect stdout to log_file
|
||||
# litellm.cache = litellm.Cache(
|
||||
# type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2"
|
||||
# )
|
||||
litellm.set_verbose = True
|
||||
from litellm._logging import verbose_logger
|
||||
import logging
|
||||
# def test_s3_logging_r2():
|
||||
# # all s3 requests need to be in one test function
|
||||
# # since we are modifying stdout, and pytests runs tests in parallel
|
||||
# # on circle ci - we only test litellm.acompletion()
|
||||
# try:
|
||||
# # redirect stdout to log_file
|
||||
# # litellm.cache = litellm.Cache(
|
||||
# # type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2"
|
||||
# # )
|
||||
# litellm.set_verbose = True
|
||||
# from litellm._logging import verbose_logger
|
||||
# import logging
|
||||
|
||||
verbose_logger.setLevel(level=logging.DEBUG)
|
||||
# verbose_logger.setLevel(level=logging.DEBUG)
|
||||
|
||||
litellm.success_callback = ["s3"]
|
||||
litellm.s3_callback_params = {
|
||||
"s3_bucket_name": "litellm-r2-bucket",
|
||||
"s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY",
|
||||
"s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID",
|
||||
"s3_endpoint_url": "os.environ/R2_S3_URL",
|
||||
"s3_region_name": "os.environ/R2_S3_REGION_NAME",
|
||||
}
|
||||
print("Testing async s3 logging")
|
||||
# litellm.success_callback = ["s3"]
|
||||
# litellm.s3_callback_params = {
|
||||
# "s3_bucket_name": "litellm-r2-bucket",
|
||||
# "s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY",
|
||||
# "s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID",
|
||||
# "s3_endpoint_url": "os.environ/R2_S3_URL",
|
||||
# "s3_region_name": "os.environ/R2_S3_REGION_NAME",
|
||||
# }
|
||||
# print("Testing async s3 logging")
|
||||
|
||||
expected_keys = []
|
||||
# expected_keys = []
|
||||
|
||||
import time
|
||||
# import time
|
||||
|
||||
curr_time = str(time.time())
|
||||
# curr_time = str(time.time())
|
||||
|
||||
async def _test():
|
||||
return await litellm.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
||||
max_tokens=10,
|
||||
temperature=0.7,
|
||||
user="ishaan-2",
|
||||
)
|
||||
# async def _test():
|
||||
# return await litellm.acompletion(
|
||||
# model="gpt-3.5-turbo",
|
||||
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
||||
# max_tokens=10,
|
||||
# temperature=0.7,
|
||||
# user="ishaan-2",
|
||||
# )
|
||||
|
||||
response = asyncio.run(_test())
|
||||
print(f"response: {response}")
|
||||
expected_keys.append(response.id)
|
||||
# response = asyncio.run(_test())
|
||||
# print(f"response: {response}")
|
||||
# expected_keys.append(response.id)
|
||||
|
||||
import boto3
|
||||
# import boto3
|
||||
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=os.getenv("R2_S3_URL"),
|
||||
region_name=os.getenv("R2_S3_REGION_NAME"),
|
||||
aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"),
|
||||
aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"),
|
||||
)
|
||||
# s3 = boto3.client(
|
||||
# "s3",
|
||||
# endpoint_url=os.getenv("R2_S3_URL"),
|
||||
# region_name=os.getenv("R2_S3_REGION_NAME"),
|
||||
# aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"),
|
||||
# aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"),
|
||||
# )
|
||||
|
||||
bucket_name = "litellm-r2-bucket"
|
||||
# List objects in the bucket
|
||||
response = s3.list_objects(Bucket=bucket_name)
|
||||
# bucket_name = "litellm-r2-bucket"
|
||||
# # List objects in the bucket
|
||||
# response = s3.list_objects(Bucket=bucket_name)
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred - {e}")
|
||||
finally:
|
||||
# post, close log file and verify
|
||||
# Reset stdout to the original value
|
||||
print("Passed! Testing async s3 logging")
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"An exception occurred - {e}")
|
||||
# finally:
|
||||
# # post, close log file and verify
|
||||
# # Reset stdout to the original value
|
||||
# print("Passed! Testing async s3 logging")
|
||||
|
|
|
@ -130,6 +130,8 @@ def test_vertex_ai():
|
|||
f"response.choices[0].finish_reason: {response.choices[0].finish_reason}"
|
||||
)
|
||||
assert response.choices[0].finish_reason in litellm._openai_finish_reasons
|
||||
except litellm.RateLimitError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
@ -183,6 +185,8 @@ def test_vertex_ai_stream():
|
|||
assert type(content) == str
|
||||
# pass
|
||||
assert len(completed_str) > 4
|
||||
except litellm.RateLimitError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
|
|
@ -193,16 +193,26 @@ async def test_hf_completion_tgi():
|
|||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
except litellm.APIError as e:
|
||||
print("got an api error")
|
||||
pass
|
||||
except litellm.Timeout as e:
|
||||
print("got a timeout error")
|
||||
pass
|
||||
except litellm.RateLimitError as e:
|
||||
# this will catch the model is overloaded error
|
||||
print("got a rate limit error")
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
if "Model is overloaded" in str(e):
|
||||
pass
|
||||
else:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_get_cloudflare_response_streaming()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_completion_sagemaker():
|
||||
# litellm.set_verbose=True
|
||||
|
|
|
@ -1,257 +1,259 @@
|
|||
import sys, os
|
||||
import traceback
|
||||
from dotenv import load_dotenv
|
||||
# @pytest.mark.skip(reason="AWS Suspended Account")
|
||||
# import sys, os
|
||||
# import traceback
|
||||
# from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
import os, io
|
||||
# load_dotenv()
|
||||
# import os, io
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import pytest
|
||||
import litellm
|
||||
from litellm import embedding, completion, completion_cost, Timeout
|
||||
from litellm import RateLimitError
|
||||
# sys.path.insert(
|
||||
# 0, os.path.abspath("../..")
|
||||
# ) # Adds the parent directory to the system path
|
||||
# import pytest
|
||||
# import litellm
|
||||
# from litellm import embedding, completion, completion_cost, Timeout
|
||||
# from litellm import RateLimitError
|
||||
|
||||
# litellm.num_retries = 3
|
||||
litellm.cache = None
|
||||
litellm.success_callback = []
|
||||
user_message = "Write a short poem about the sky"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
# # litellm.num_retries = 3
|
||||
# litellm.cache = None
|
||||
# litellm.success_callback = []
|
||||
# user_message = "Write a short poem about the sky"
|
||||
# messages = [{"content": user_message, "role": "user"}]
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_callbacks():
|
||||
print("\npytest fixture - resetting callbacks")
|
||||
litellm.success_callback = []
|
||||
litellm._async_success_callback = []
|
||||
litellm.failure_callback = []
|
||||
litellm.callbacks = []
|
||||
# @pytest.fixture(autouse=True)
|
||||
# def reset_callbacks():
|
||||
# print("\npytest fixture - resetting callbacks")
|
||||
# litellm.success_callback = []
|
||||
# litellm._async_success_callback = []
|
||||
# litellm.failure_callback = []
|
||||
# litellm.callbacks = []
|
||||
|
||||
|
||||
def test_completion_bedrock_claude_completion_auth():
|
||||
print("calling bedrock claude completion params auth")
|
||||
import os
|
||||
# def test_completion_bedrock_claude_completion_auth():
|
||||
# print("calling bedrock claude completion params auth")
|
||||
# import os
|
||||
|
||||
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
|
||||
os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||
os.environ.pop("AWS_REGION_NAME", None)
|
||||
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||
# os.environ.pop("AWS_REGION_NAME", None)
|
||||
|
||||
try:
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
temperature=0.1,
|
||||
aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
aws_region_name=aws_region_name,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
# try:
|
||||
# response = completion(
|
||||
# model="bedrock/anthropic.claude-instant-v1",
|
||||
# messages=messages,
|
||||
# max_tokens=10,
|
||||
# temperature=0.1,
|
||||
# aws_access_key_id=aws_access_key_id,
|
||||
# aws_secret_access_key=aws_secret_access_key,
|
||||
# aws_region_name=aws_region_name,
|
||||
# )
|
||||
# # Add any assertions here to check the response
|
||||
# print(response)
|
||||
|
||||
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||
os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||
# os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||
# except RateLimitError:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_completion_bedrock_claude_completion_auth()
|
||||
# # test_completion_bedrock_claude_completion_auth()
|
||||
|
||||
|
||||
def test_completion_bedrock_claude_2_1_completion_auth():
|
||||
print("calling bedrock claude 2.1 completion params auth")
|
||||
import os
|
||||
# def test_completion_bedrock_claude_2_1_completion_auth():
|
||||
# print("calling bedrock claude 2.1 completion params auth")
|
||||
# import os
|
||||
|
||||
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
|
||||
os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||
os.environ.pop("AWS_REGION_NAME", None)
|
||||
try:
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-v2:1",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
temperature=0.1,
|
||||
aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
aws_region_name=aws_region_name,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||
# os.environ.pop("AWS_REGION_NAME", None)
|
||||
# try:
|
||||
# response = completion(
|
||||
# model="bedrock/anthropic.claude-v2:1",
|
||||
# messages=messages,
|
||||
# max_tokens=10,
|
||||
# temperature=0.1,
|
||||
# aws_access_key_id=aws_access_key_id,
|
||||
# aws_secret_access_key=aws_secret_access_key,
|
||||
# aws_region_name=aws_region_name,
|
||||
# )
|
||||
# # Add any assertions here to check the response
|
||||
# print(response)
|
||||
|
||||
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||
os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||
# os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||
# except RateLimitError:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_completion_bedrock_claude_2_1_completion_auth()
|
||||
# # test_completion_bedrock_claude_2_1_completion_auth()
|
||||
|
||||
|
||||
def test_completion_bedrock_claude_external_client_auth():
|
||||
print("\ncalling bedrock claude external client auth")
|
||||
import os
|
||||
# def test_completion_bedrock_claude_external_client_auth():
|
||||
# print("\ncalling bedrock claude external client auth")
|
||||
# import os
|
||||
|
||||
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
|
||||
os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||
os.environ.pop("AWS_REGION_NAME", None)
|
||||
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||
# os.environ.pop("AWS_REGION_NAME", None)
|
||||
|
||||
try:
|
||||
import boto3
|
||||
# try:
|
||||
# import boto3
|
||||
|
||||
litellm.set_verbose = True
|
||||
# litellm.set_verbose = True
|
||||
|
||||
bedrock = boto3.client(
|
||||
service_name="bedrock-runtime",
|
||||
region_name=aws_region_name,
|
||||
aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
|
||||
)
|
||||
# bedrock = boto3.client(
|
||||
# service_name="bedrock-runtime",
|
||||
# region_name=aws_region_name,
|
||||
# aws_access_key_id=aws_access_key_id,
|
||||
# aws_secret_access_key=aws_secret_access_key,
|
||||
# endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
|
||||
# )
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
temperature=0.1,
|
||||
aws_bedrock_client=bedrock,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
# response = completion(
|
||||
# model="bedrock/anthropic.claude-instant-v1",
|
||||
# messages=messages,
|
||||
# max_tokens=10,
|
||||
# temperature=0.1,
|
||||
# aws_bedrock_client=bedrock,
|
||||
# )
|
||||
# # Add any assertions here to check the response
|
||||
# print(response)
|
||||
|
||||
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||
os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||
# os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||
# except RateLimitError:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_completion_bedrock_claude_external_client_auth()
|
||||
# # test_completion_bedrock_claude_external_client_auth()
|
||||
|
||||
|
||||
def test_completion_bedrock_claude_sts_client_auth():
|
||||
print("\ncalling bedrock claude external client auth")
|
||||
import os
|
||||
# @pytest.mark.skip(reason="Expired token, need to renew")
|
||||
# def test_completion_bedrock_claude_sts_client_auth():
|
||||
# print("\ncalling bedrock claude external client auth")
|
||||
# import os
|
||||
|
||||
aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
|
||||
aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
|
||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
||||
# aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
|
||||
# aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
|
||||
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
||||
|
||||
try:
|
||||
import boto3
|
||||
# try:
|
||||
# import boto3
|
||||
|
||||
litellm.set_verbose = True
|
||||
# litellm.set_verbose = True
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
temperature=0.1,
|
||||
aws_region_name=aws_region_name,
|
||||
aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
aws_role_name=aws_role_name,
|
||||
aws_session_name="my-test-session",
|
||||
)
|
||||
# response = completion(
|
||||
# model="bedrock/anthropic.claude-instant-v1",
|
||||
# messages=messages,
|
||||
# max_tokens=10,
|
||||
# temperature=0.1,
|
||||
# aws_region_name=aws_region_name,
|
||||
# aws_access_key_id=aws_access_key_id,
|
||||
# aws_secret_access_key=aws_secret_access_key,
|
||||
# aws_role_name=aws_role_name,
|
||||
# aws_session_name="my-test-session",
|
||||
# )
|
||||
|
||||
response = embedding(
|
||||
model="cohere.embed-multilingual-v3",
|
||||
input=["hello world"],
|
||||
aws_region_name="us-east-1",
|
||||
aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
aws_role_name=aws_role_name,
|
||||
aws_session_name="my-test-session",
|
||||
)
|
||||
# response = embedding(
|
||||
# model="cohere.embed-multilingual-v3",
|
||||
# input=["hello world"],
|
||||
# aws_region_name="us-east-1",
|
||||
# aws_access_key_id=aws_access_key_id,
|
||||
# aws_secret_access_key=aws_secret_access_key,
|
||||
# aws_role_name=aws_role_name,
|
||||
# aws_session_name="my-test-session",
|
||||
# )
|
||||
|
||||
response = completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=messages,
|
||||
aws_region_name="us-east-1",
|
||||
aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
aws_role_name=aws_role_name,
|
||||
aws_session_name="my-test-session",
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
# response = completion(
|
||||
# model="gpt-3.5-turbo",
|
||||
# messages=messages,
|
||||
# aws_region_name="us-east-1",
|
||||
# aws_access_key_id=aws_access_key_id,
|
||||
# aws_secret_access_key=aws_secret_access_key,
|
||||
# aws_role_name=aws_role_name,
|
||||
# aws_session_name="my-test-session",
|
||||
# )
|
||||
# # Add any assertions here to check the response
|
||||
# print(response)
|
||||
# except RateLimitError:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
test_completion_bedrock_claude_sts_client_auth()
|
||||
# # test_completion_bedrock_claude_sts_client_auth()
|
||||
|
||||
|
||||
def test_provisioned_throughput():
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
import botocore, json, io
|
||||
import botocore.session
|
||||
from botocore.stub import Stubber
|
||||
# def test_provisioned_throughput():
|
||||
# try:
|
||||
# litellm.set_verbose = True
|
||||
# import botocore, json, io
|
||||
# import botocore.session
|
||||
# from botocore.stub import Stubber
|
||||
|
||||
bedrock_client = botocore.session.get_session().create_client(
|
||||
"bedrock-runtime", region_name="us-east-1"
|
||||
)
|
||||
# bedrock_client = botocore.session.get_session().create_client(
|
||||
# "bedrock-runtime", region_name="us-east-1"
|
||||
# )
|
||||
|
||||
expected_params = {
|
||||
"accept": "application/json",
|
||||
"body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
|
||||
'"max_tokens_to_sample": 256}',
|
||||
"contentType": "application/json",
|
||||
"modelId": "provisioned-model-arn",
|
||||
}
|
||||
response_from_bedrock = {
|
||||
"body": io.StringIO(
|
||||
json.dumps(
|
||||
{
|
||||
"completion": " Here is a short poem about the sky:",
|
||||
"stop_reason": "max_tokens",
|
||||
"stop": None,
|
||||
}
|
||||
)
|
||||
),
|
||||
"contentType": "contentType",
|
||||
"ResponseMetadata": {"HTTPStatusCode": 200},
|
||||
}
|
||||
# expected_params = {
|
||||
# "accept": "application/json",
|
||||
# "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
|
||||
# '"max_tokens_to_sample": 256}',
|
||||
# "contentType": "application/json",
|
||||
# "modelId": "provisioned-model-arn",
|
||||
# }
|
||||
# response_from_bedrock = {
|
||||
# "body": io.StringIO(
|
||||
# json.dumps(
|
||||
# {
|
||||
# "completion": " Here is a short poem about the sky:",
|
||||
# "stop_reason": "max_tokens",
|
||||
# "stop": None,
|
||||
# }
|
||||
# )
|
||||
# ),
|
||||
# "contentType": "contentType",
|
||||
# "ResponseMetadata": {"HTTPStatusCode": 200},
|
||||
# }
|
||||
|
||||
with Stubber(bedrock_client) as stubber:
|
||||
stubber.add_response(
|
||||
"invoke_model",
|
||||
service_response=response_from_bedrock,
|
||||
expected_params=expected_params,
|
||||
)
|
||||
response = litellm.completion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
model_id="provisioned-model-arn",
|
||||
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||
aws_bedrock_client=bedrock_client,
|
||||
)
|
||||
print("response stubbed", response)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
# with Stubber(bedrock_client) as stubber:
|
||||
# stubber.add_response(
|
||||
# "invoke_model",
|
||||
# service_response=response_from_bedrock,
|
||||
# expected_params=expected_params,
|
||||
# )
|
||||
# response = litellm.completion(
|
||||
# model="bedrock/anthropic.claude-instant-v1",
|
||||
# model_id="provisioned-model-arn",
|
||||
# messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||
# aws_bedrock_client=bedrock_client,
|
||||
# )
|
||||
# print("response stubbed", response)
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_provisioned_throughput()
|
||||
# # test_provisioned_throughput()
|
||||
|
|
|
@ -546,6 +546,7 @@ def test_redis_cache_acompletion_stream():
|
|||
# test_redis_cache_acompletion_stream()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_redis_cache_acompletion_stream_bedrock():
|
||||
import asyncio
|
||||
|
||||
|
@ -571,7 +572,7 @@ def test_redis_cache_acompletion_stream_bedrock():
|
|||
async def call1():
|
||||
nonlocal response_1_content
|
||||
response1 = await litellm.acompletion(
|
||||
model="bedrock/anthropic.claude-v1",
|
||||
model="bedrock/anthropic.claude-v2",
|
||||
messages=messages,
|
||||
max_tokens=40,
|
||||
temperature=1,
|
||||
|
@ -589,7 +590,7 @@ def test_redis_cache_acompletion_stream_bedrock():
|
|||
async def call2():
|
||||
nonlocal response_2_content
|
||||
response2 = await litellm.acompletion(
|
||||
model="bedrock/anthropic.claude-v1",
|
||||
model="bedrock/anthropic.claude-v2",
|
||||
messages=messages,
|
||||
max_tokens=40,
|
||||
temperature=1,
|
||||
|
@ -615,6 +616,7 @@ def test_redis_cache_acompletion_stream_bedrock():
|
|||
raise e
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_s3_cache_acompletion_stream_azure():
|
||||
import asyncio
|
||||
|
||||
|
@ -697,6 +699,7 @@ def test_s3_cache_acompletion_stream_azure():
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
async def test_s3_cache_acompletion_azure():
|
||||
import asyncio
|
||||
import logging
|
||||
|
|
|
@ -1404,6 +1404,7 @@ def test_customprompt_together_ai():
|
|||
# test_customprompt_together_ai()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_sagemaker():
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
|
@ -1429,6 +1430,7 @@ def test_completion_sagemaker():
|
|||
# test_completion_sagemaker()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_sagemaker_stream():
|
||||
try:
|
||||
litellm.set_verbose = False
|
||||
|
@ -1459,6 +1461,7 @@ def test_completion_sagemaker_stream():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_chat_sagemaker():
|
||||
try:
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
|
@ -1483,6 +1486,7 @@ def test_completion_chat_sagemaker():
|
|||
# test_completion_chat_sagemaker()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_chat_sagemaker_mistral():
|
||||
try:
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
|
@ -1501,6 +1505,7 @@ def test_completion_chat_sagemaker_mistral():
|
|||
# test_completion_chat_sagemaker_mistral()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_bedrock_titan_null_response():
|
||||
try:
|
||||
response = completion(
|
||||
|
@ -1526,6 +1531,7 @@ def test_completion_bedrock_titan_null_response():
|
|||
pytest.fail(f"An error occurred - {str(e)}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_bedrock_titan():
|
||||
try:
|
||||
response = completion(
|
||||
|
@ -1547,6 +1553,7 @@ def test_completion_bedrock_titan():
|
|||
# test_completion_bedrock_titan()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_bedrock_claude():
|
||||
print("calling claude")
|
||||
try:
|
||||
|
@ -1568,6 +1575,7 @@ def test_completion_bedrock_claude():
|
|||
# test_completion_bedrock_claude()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_bedrock_cohere():
|
||||
print("calling bedrock cohere")
|
||||
litellm.set_verbose = True
|
||||
|
@ -1954,12 +1962,15 @@ def test_completion_gemini():
|
|||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
try:
|
||||
response = completion(model=model_name, messages=messages)
|
||||
# Add any assertions here to check the response
|
||||
# Add any assertions,here to check the response
|
||||
print(response)
|
||||
except litellm.APIError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
if "InternalServerError" in str(e):
|
||||
pass
|
||||
else:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_completion_gemini()
|
||||
|
@ -1974,8 +1985,13 @@ async def test_acompletion_gemini():
|
|||
response = await litellm.acompletion(model=model_name, messages=messages)
|
||||
# Add any assertions here to check the response
|
||||
print(f"response: {response}")
|
||||
except litellm.APIError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
if "InternalServerError" in str(e):
|
||||
pass
|
||||
else:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# Palm tests
|
||||
|
|
|
@ -171,6 +171,7 @@ def test_cost_openai_image_gen():
|
|||
assert cost == 0.019922944
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_cost_bedrock_pricing():
|
||||
"""
|
||||
- get pricing specific to region for a model
|
||||
|
@ -226,6 +227,7 @@ def test_cost_bedrock_pricing():
|
|||
assert cost == predicted_cost
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS disabled our access")
|
||||
def test_cost_bedrock_pricing_actual_calls():
|
||||
litellm.set_verbose = True
|
||||
model = "anthropic.claude-instant-v1"
|
||||
|
|
|
@ -80,16 +80,6 @@ model_list:
|
|||
description: this is a test openai model
|
||||
id: 9b1ef341-322c-410a-8992-903987fef439
|
||||
model_name: test_openai_models
|
||||
- litellm_params:
|
||||
model: bedrock/amazon.titan-embed-text-v1
|
||||
model_info:
|
||||
mode: embedding
|
||||
model_name: amazon-embeddings
|
||||
- litellm_params:
|
||||
model: sagemaker/berri-benchmarking-gpt-j-6b-fp16
|
||||
model_info:
|
||||
mode: embedding
|
||||
model_name: GPT-J 6B - Sagemaker Text Embedding (Internal)
|
||||
- litellm_params:
|
||||
model: dall-e-3
|
||||
model_info:
|
||||
|
|
|
@ -478,17 +478,18 @@ async def test_async_chat_azure_stream():
|
|||
|
||||
|
||||
## Test Bedrock + sync
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_chat_bedrock_stream():
|
||||
try:
|
||||
customHandler = CompletionCustomHandler()
|
||||
litellm.callbacks = [customHandler]
|
||||
response = litellm.completion(
|
||||
model="bedrock/anthropic.claude-v1",
|
||||
model="bedrock/anthropic.claude-v2",
|
||||
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
|
||||
)
|
||||
# test streaming
|
||||
response = litellm.completion(
|
||||
model="bedrock/anthropic.claude-v1",
|
||||
model="bedrock/anthropic.claude-v2",
|
||||
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
|
||||
stream=True,
|
||||
)
|
||||
|
@ -497,7 +498,7 @@ def test_chat_bedrock_stream():
|
|||
# test failure callback
|
||||
try:
|
||||
response = litellm.completion(
|
||||
model="bedrock/anthropic.claude-v1",
|
||||
model="bedrock/anthropic.claude-v2",
|
||||
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
|
||||
aws_region_name="my-bad-region",
|
||||
stream=True,
|
||||
|
@ -518,18 +519,19 @@ def test_chat_bedrock_stream():
|
|||
|
||||
|
||||
## Test Bedrock + Async
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_chat_bedrock_stream():
|
||||
try:
|
||||
customHandler = CompletionCustomHandler()
|
||||
litellm.callbacks = [customHandler]
|
||||
response = await litellm.acompletion(
|
||||
model="bedrock/anthropic.claude-v1",
|
||||
model="bedrock/anthropic.claude-v2",
|
||||
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
|
||||
)
|
||||
# test streaming
|
||||
response = await litellm.acompletion(
|
||||
model="bedrock/anthropic.claude-v1",
|
||||
model="bedrock/anthropic.claude-v2",
|
||||
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
|
||||
stream=True,
|
||||
)
|
||||
|
@ -540,7 +542,7 @@ async def test_async_chat_bedrock_stream():
|
|||
## test failure callback
|
||||
try:
|
||||
response = await litellm.acompletion(
|
||||
model="bedrock/anthropic.claude-v1",
|
||||
model="bedrock/anthropic.claude-v2",
|
||||
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
|
||||
aws_region_name="my-bad-key",
|
||||
stream=True,
|
||||
|
@ -561,6 +563,7 @@ async def test_async_chat_bedrock_stream():
|
|||
|
||||
|
||||
## Test Sagemaker + Async
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_chat_sagemaker_stream():
|
||||
try:
|
||||
|
@ -793,6 +796,7 @@ async def test_async_embedding_azure():
|
|||
|
||||
|
||||
## Test Bedrock + Async
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_embedding_bedrock():
|
||||
try:
|
||||
|
|
|
@ -388,6 +388,7 @@ async def test_async_custom_handler_embedding_optional_param():
|
|||
# asyncio.run(test_async_custom_handler_embedding_optional_param())
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Account suspended. Pending their approval")
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_custom_handler_embedding_optional_param_bedrock():
|
||||
"""
|
||||
|
|
|
@ -67,6 +67,7 @@ def verify_log_file(log_file_path):
|
|||
assert success_count == 3 # Expect 3 success logs from dynamoDB
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_dynamo_logging():
|
||||
# all dynamodb requests need to be in one test function
|
||||
# since we are modifying stdout, and pytests runs tests in parallel
|
||||
|
|
|
@ -256,6 +256,7 @@ async def test_vertexai_aembedding():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_bedrock_embedding_titan():
|
||||
try:
|
||||
# this tests if we support str input for bedrock embedding
|
||||
|
@ -301,6 +302,7 @@ def test_bedrock_embedding_titan():
|
|||
# test_bedrock_embedding_titan()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_bedrock_embedding_cohere():
|
||||
try:
|
||||
litellm.set_verbose = False
|
||||
|
@ -422,6 +424,7 @@ def test_aembedding_azure():
|
|||
# test_aembedding_azure()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_sagemaker_embeddings():
|
||||
try:
|
||||
response = litellm.embedding(
|
||||
|
@ -438,6 +441,7 @@ def test_sagemaker_embeddings():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_sagemaker_aembeddings():
|
||||
try:
|
||||
|
|
|
@ -42,6 +42,7 @@ exception_models = [
|
|||
|
||||
|
||||
# Test 1: Context Window Errors
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.parametrize("model", exception_models)
|
||||
def test_context_window(model):
|
||||
print("Testing context window error")
|
||||
|
@ -120,9 +121,9 @@ def invalid_auth(model): # set the model key to an invalid key, depending on th
|
|||
os.environ["AI21_API_KEY"] = "bad-key"
|
||||
elif "togethercomputer" in model:
|
||||
temporary_key = os.environ["TOGETHERAI_API_KEY"]
|
||||
os.environ[
|
||||
"TOGETHERAI_API_KEY"
|
||||
] = "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
|
||||
os.environ["TOGETHERAI_API_KEY"] = (
|
||||
"84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
|
||||
)
|
||||
elif model in litellm.openrouter_models:
|
||||
temporary_key = os.environ["OPENROUTER_API_KEY"]
|
||||
os.environ["OPENROUTER_API_KEY"] = "bad-key"
|
||||
|
|
|
@ -87,6 +87,7 @@ async def test_azure_img_gen_health_check():
|
|||
# asyncio.run(test_azure_img_gen_health_check())
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_sagemaker_embedding_health_check():
|
||||
response = await litellm.ahealth_check(
|
||||
|
|
|
@ -121,6 +121,7 @@ async def test_async_image_generation_azure():
|
|||
pytest.fail(f"An exception occurred - {str(e)}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_image_generation_bedrock():
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
|
@ -141,6 +142,7 @@ def test_image_generation_bedrock():
|
|||
pytest.fail(f"An exception occurred - {str(e)}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_aimage_generation_bedrock_with_optional_params():
|
||||
try:
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -80,6 +80,14 @@ request_data = {
|
|||
|
||||
@pytest.fixture
|
||||
def prisma_client():
|
||||
from litellm.proxy.proxy_cli import append_query_params
|
||||
|
||||
### add connection pool + pool timeout args
|
||||
params = {"connection_limit": 100, "pool_timeout": 60}
|
||||
database_url = os.getenv("DATABASE_URL")
|
||||
modified_url = append_query_params(database_url, params)
|
||||
os.environ["DATABASE_URL"] = modified_url
|
||||
|
||||
# Assuming DBClient is a class that needs to be instantiated
|
||||
prisma_client = PrismaClient(
|
||||
database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
|
||||
|
@ -1633,3 +1641,99 @@ async def test_key_with_no_permissions(prisma_client):
|
|||
except Exception as e:
|
||||
print("Got Exception", e)
|
||||
print(e.message)
|
||||
|
||||
|
||||
async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
|
||||
from litellm import ModelResponse, Choices, Message, Usage
|
||||
from litellm.proxy.proxy_server import (
|
||||
_PROXY_track_cost_callback as track_cost_callback,
|
||||
)
|
||||
|
||||
import uuid
|
||||
|
||||
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
|
||||
resp = ModelResponse(
|
||||
id=request_id,
|
||||
choices=[
|
||||
Choices(
|
||||
finish_reason=None,
|
||||
index=0,
|
||||
message=Message(
|
||||
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
||||
role="assistant",
|
||||
),
|
||||
)
|
||||
],
|
||||
model="gpt-35-turbo", # azure always has model written like this
|
||||
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
||||
)
|
||||
await track_cost_callback(
|
||||
kwargs={
|
||||
"call_type": "acompletion",
|
||||
"model": "sagemaker-chatgpt-v-2",
|
||||
"stream": True,
|
||||
"complete_streaming_response": resp,
|
||||
"litellm_params": {
|
||||
"metadata": {
|
||||
"user_api_key": hash_token(generated_key),
|
||||
"user_api_key_user_id": user_id,
|
||||
}
|
||||
},
|
||||
"response_cost": 0.00005,
|
||||
},
|
||||
completion_response=resp,
|
||||
start_time=datetime.now(),
|
||||
end_time=datetime.now(),
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="High traffic load test for spend tracking")
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_load_test_db(prisma_client):
|
||||
"""
|
||||
Run 1500 req./s against track_cost_callback function
|
||||
"""
|
||||
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
import logging, time
|
||||
|
||||
litellm.set_verbose = True
|
||||
verbose_proxy_logger.setLevel(logging.DEBUG)
|
||||
try:
|
||||
start_time = time.time()
|
||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||
request = GenerateKeyRequest(max_budget=0.00001)
|
||||
key = await generate_key_fn(request)
|
||||
print(key)
|
||||
|
||||
generated_key = key.key
|
||||
user_id = key.user_id
|
||||
bearer_token = "Bearer " + generated_key
|
||||
|
||||
request = Request(scope={"type": "http"})
|
||||
request._url = URL(url="/chat/completions")
|
||||
|
||||
# use generated key to auth in
|
||||
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||
print("result from user auth with new key", result)
|
||||
# update spend using track_cost callback, make 2nd request, it should fail
|
||||
n = 5000
|
||||
tasks = [
|
||||
track_cost_callback_helper_fn(generated_key=generated_key, user_id=user_id)
|
||||
for _ in range(n)
|
||||
]
|
||||
completions = await asyncio.gather(*tasks)
|
||||
await asyncio.sleep(120)
|
||||
try:
|
||||
# call spend logs
|
||||
spend_logs = await view_spend_logs(api_key=generated_key)
|
||||
|
||||
print(f"len responses: {len(spend_logs)}")
|
||||
assert len(spend_logs) == n
|
||||
print(n, time.time() - start_time, len(spend_logs))
|
||||
except:
|
||||
print(n, time.time() - start_time, 0)
|
||||
raise Exception(f"it worked! key={key.key}")
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred - {str(e)}")
|
||||
|
|
|
@ -12,6 +12,7 @@ import litellm
|
|||
from litellm import completion
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_sagemaker():
|
||||
litellm.set_verbose = True
|
||||
litellm.drop_params = True
|
||||
|
|
|
@ -473,6 +473,7 @@ def aleph_alpha_test_completion():
|
|||
# Sagemaker
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def sagemaker_test_completion():
|
||||
litellm.SagemakerConfig(max_new_tokens=10)
|
||||
# litellm.set_verbose=True
|
||||
|
@ -514,6 +515,7 @@ def sagemaker_test_completion():
|
|||
# Bedrock
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def bedrock_test_completion():
|
||||
litellm.AmazonCohereConfig(max_tokens=10)
|
||||
# litellm.set_verbose=True
|
||||
|
|
|
@ -125,6 +125,7 @@ def test_embedding(client_no_auth):
|
|||
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_bedrock_embedding(client_no_auth):
|
||||
global headers
|
||||
from litellm.proxy.proxy_server import user_custom_auth
|
||||
|
@ -145,6 +146,7 @@ def test_bedrock_embedding(client_no_auth):
|
|||
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_sagemaker_embedding(client_no_auth):
|
||||
global headers
|
||||
from litellm.proxy.proxy_server import user_custom_auth
|
||||
|
|
|
@ -61,6 +61,7 @@ def generate_random_word(length=4):
|
|||
return "".join(random.choice(letters) for _ in range(length))
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_chat_completion(client_no_auth):
|
||||
global headers
|
||||
try:
|
||||
|
|
|
@ -166,14 +166,6 @@ def test_call_one_endpoint():
|
|||
"tpm": 240000,
|
||||
"rpm": 1800,
|
||||
},
|
||||
{
|
||||
"model_name": "claude-v1",
|
||||
"litellm_params": {
|
||||
"model": "bedrock/anthropic.claude-instant-v1",
|
||||
},
|
||||
"tpm": 100000,
|
||||
"rpm": 10000,
|
||||
},
|
||||
{
|
||||
"model_name": "text-embedding-ada-002",
|
||||
"litellm_params": {
|
||||
|
@ -202,15 +194,6 @@ def test_call_one_endpoint():
|
|||
)
|
||||
print("\n response", response)
|
||||
|
||||
async def call_bedrock_claude():
|
||||
response = await router.acompletion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
messages=[{"role": "user", "content": "hello this request will pass"}],
|
||||
specific_deployment=True,
|
||||
)
|
||||
|
||||
print("\n response", response)
|
||||
|
||||
async def call_azure_embedding():
|
||||
response = await router.aembedding(
|
||||
model="azure/azure-embedding-model",
|
||||
|
@ -221,7 +204,6 @@ def test_call_one_endpoint():
|
|||
print("\n response", response)
|
||||
|
||||
asyncio.run(call_azure_completion())
|
||||
asyncio.run(call_bedrock_claude())
|
||||
asyncio.run(call_azure_embedding())
|
||||
|
||||
os.environ["AZURE_API_BASE"] = old_api_base
|
||||
|
@ -593,6 +575,7 @@ def test_azure_embedding_on_router():
|
|||
# test_azure_embedding_on_router()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_bedrock_on_router():
|
||||
litellm.set_verbose = True
|
||||
print("\n Testing bedrock on router\n")
|
||||
|
|
|
@ -87,6 +87,7 @@ def test_router_timeouts():
|
|||
print("********** TOKENS USED SO FAR = ", total_tokens_used)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_router_timeouts_bedrock():
|
||||
import openai
|
||||
|
|
|
@ -764,6 +764,7 @@ def test_completion_replicate_stream_bad_key():
|
|||
# test_completion_replicate_stream_bad_key()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_bedrock_claude_stream():
|
||||
try:
|
||||
litellm.set_verbose = False
|
||||
|
@ -810,6 +811,7 @@ def test_completion_bedrock_claude_stream():
|
|||
# test_completion_bedrock_claude_stream()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_bedrock_ai21_stream():
|
||||
try:
|
||||
litellm.set_verbose = False
|
||||
|
@ -911,6 +913,7 @@ def test_sagemaker_weird_response():
|
|||
# test_sagemaker_weird_response()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_sagemaker_streaming_async():
|
||||
try:
|
||||
|
@ -949,6 +952,7 @@ async def test_sagemaker_streaming_async():
|
|||
# asyncio.run(test_sagemaker_streaming_async())
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_sagemaker_stream():
|
||||
try:
|
||||
response = completion(
|
||||
|
@ -1075,8 +1079,6 @@ async def test_hf_completion_tgi_stream():
|
|||
if finished:
|
||||
break
|
||||
idx += 1
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"completion_response: {complete_response}")
|
||||
except litellm.ServiceUnavailableError as e:
|
||||
pass
|
||||
|
|
|
@ -317,3 +317,24 @@ def test_token_counter():
|
|||
|
||||
|
||||
# test_token_counter()
|
||||
|
||||
|
||||
def test_supports_function_calling():
|
||||
try:
|
||||
assert litellm.supports_function_calling(model="gpt-3.5-turbo") == True
|
||||
assert (
|
||||
litellm.supports_function_calling(model="azure/gpt-4-1106-preview") == True
|
||||
)
|
||||
assert (
|
||||
litellm.supports_function_calling(model="anthropic.claude-instant-v1")
|
||||
== False
|
||||
)
|
||||
assert litellm.supports_function_calling(model="palm/chat-bison") == False
|
||||
assert litellm.supports_function_calling(model="ollama/llama2") == False
|
||||
assert (
|
||||
litellm.supports_function_calling(model="anthropic.claude-instant-v1")
|
||||
== False
|
||||
)
|
||||
assert litellm.supports_function_calling(model="claude-2") == False
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
|
130
litellm/utils.py
130
litellm/utils.py
|
@ -205,18 +205,18 @@ def map_finish_reason(
|
|||
|
||||
class FunctionCall(OpenAIObject):
|
||||
arguments: str
|
||||
name: str
|
||||
name: Optional[str] = None
|
||||
|
||||
|
||||
class Function(OpenAIObject):
|
||||
arguments: str
|
||||
name: str
|
||||
name: Optional[str] = None
|
||||
|
||||
|
||||
class ChatCompletionDeltaToolCall(OpenAIObject):
|
||||
id: str
|
||||
id: Optional[str] = None
|
||||
function: Function
|
||||
type: str
|
||||
type: Optional[str] = None
|
||||
index: int
|
||||
|
||||
|
||||
|
@ -275,13 +275,19 @@ class Delta(OpenAIObject):
|
|||
super(Delta, self).__init__(**params)
|
||||
self.content = content
|
||||
self.role = role
|
||||
self.function_call = function_call
|
||||
if tool_calls is not None and isinstance(tool_calls, dict):
|
||||
if function_call is not None and isinstance(function_call, dict):
|
||||
self.function_call = FunctionCall(**function_call)
|
||||
else:
|
||||
self.function_call = function_call
|
||||
if tool_calls is not None and isinstance(tool_calls, list):
|
||||
self.tool_calls = []
|
||||
for tool_call in tool_calls:
|
||||
if tool_call.get("index", None) is None:
|
||||
tool_call["index"] = 0
|
||||
self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
|
||||
if isinstance(tool_call, dict):
|
||||
if tool_call.get("index", None) is None:
|
||||
tool_call["index"] = 0
|
||||
self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
|
||||
elif isinstance(tool_call, ChatCompletionDeltaToolCall):
|
||||
self.tool_calls.append(tool_call)
|
||||
else:
|
||||
self.tool_calls = tool_calls
|
||||
|
||||
|
@ -1634,7 +1640,7 @@ class Logging:
|
|||
verbose_logger.debug(
|
||||
"Async success callbacks: Got a complete streaming response"
|
||||
)
|
||||
self.model_call_details["complete_streaming_response"] = (
|
||||
self.model_call_details["async_complete_streaming_response"] = (
|
||||
complete_streaming_response
|
||||
)
|
||||
try:
|
||||
|
@ -1682,28 +1688,31 @@ class Logging:
|
|||
print_verbose("async success_callback: reaches cache for logging!")
|
||||
kwargs = self.model_call_details
|
||||
if self.stream:
|
||||
if "complete_streaming_response" not in kwargs:
|
||||
if "async_complete_streaming_response" not in kwargs:
|
||||
print_verbose(
|
||||
f"async success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
|
||||
f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n"
|
||||
)
|
||||
pass
|
||||
else:
|
||||
print_verbose(
|
||||
"async success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
|
||||
"async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache"
|
||||
)
|
||||
result = kwargs["complete_streaming_response"]
|
||||
result = kwargs["async_complete_streaming_response"]
|
||||
# only add to cache once we have a complete streaming response
|
||||
litellm.cache.add_cache(result, **kwargs)
|
||||
if isinstance(callback, CustomLogger): # custom logger class
|
||||
print_verbose(
|
||||
f"Async success callbacks: {callback}; self.stream: {self.stream}; complete_streaming_response: {self.model_call_details.get('complete_streaming_response', None)}"
|
||||
f"Running Async success callback: {callback}; self.stream: {self.stream}; async_complete_streaming_response: {self.model_call_details.get('async_complete_streaming_response', None)} result={result}"
|
||||
)
|
||||
if self.stream == True:
|
||||
if "complete_streaming_response" in self.model_call_details:
|
||||
if (
|
||||
"async_complete_streaming_response"
|
||||
in self.model_call_details
|
||||
):
|
||||
await callback.async_log_success_event(
|
||||
kwargs=self.model_call_details,
|
||||
response_obj=self.model_call_details[
|
||||
"complete_streaming_response"
|
||||
"async_complete_streaming_response"
|
||||
],
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
|
@ -1724,14 +1733,18 @@ class Logging:
|
|||
)
|
||||
if callable(callback): # custom logger functions
|
||||
print_verbose(
|
||||
f"Making async function logging call - {self.model_call_details}"
|
||||
f"Making async function logging call for {callback}, result={result} - {self.model_call_details}"
|
||||
)
|
||||
if self.stream:
|
||||
if "complete_streaming_response" in self.model_call_details:
|
||||
if (
|
||||
"async_complete_streaming_response"
|
||||
in self.model_call_details
|
||||
):
|
||||
|
||||
await customLogger.async_log_event(
|
||||
kwargs=self.model_call_details,
|
||||
response_obj=self.model_call_details[
|
||||
"complete_streaming_response"
|
||||
"async_complete_streaming_response"
|
||||
],
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
|
@ -1752,14 +1765,17 @@ class Logging:
|
|||
if dynamoLogger is None:
|
||||
dynamoLogger = DyanmoDBLogger()
|
||||
if self.stream:
|
||||
if "complete_streaming_response" in self.model_call_details:
|
||||
if (
|
||||
"async_complete_streaming_response"
|
||||
in self.model_call_details
|
||||
):
|
||||
print_verbose(
|
||||
"DynamoDB Logger: Got Stream Event - Completed Stream Response"
|
||||
)
|
||||
await dynamoLogger._async_log_event(
|
||||
kwargs=self.model_call_details,
|
||||
response_obj=self.model_call_details[
|
||||
"complete_streaming_response"
|
||||
"async_complete_streaming_response"
|
||||
],
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
|
@ -3713,6 +3729,54 @@ def completion_cost(
|
|||
raise e
|
||||
|
||||
|
||||
def supports_function_calling(model: str):
|
||||
"""
|
||||
Check if the given model supports function calling and return a boolean value.
|
||||
|
||||
Parameters:
|
||||
model (str): The model name to be checked.
|
||||
|
||||
Returns:
|
||||
bool: True if the model supports function calling, False otherwise.
|
||||
|
||||
Raises:
|
||||
Exception: If the given model is not found in model_prices_and_context_window.json.
|
||||
"""
|
||||
if model in litellm.model_cost:
|
||||
model_info = litellm.model_cost[model]
|
||||
if model_info.get("supports_function_calling", False):
|
||||
return True
|
||||
return False
|
||||
else:
|
||||
raise Exception(
|
||||
f"Model not in model_prices_and_context_window.json. You passed model={model}."
|
||||
)
|
||||
|
||||
|
||||
def supports_parallel_function_calling(model: str):
|
||||
"""
|
||||
Check if the given model supports parallel function calling and return True if it does, False otherwise.
|
||||
|
||||
Parameters:
|
||||
model (str): The model to check for support of parallel function calling.
|
||||
|
||||
Returns:
|
||||
bool: True if the model supports parallel function calling, False otherwise.
|
||||
|
||||
Raises:
|
||||
Exception: If the model is not found in the model_cost dictionary.
|
||||
"""
|
||||
if model in litellm.model_cost:
|
||||
model_info = litellm.model_cost[model]
|
||||
if model_info.get("supports_parallel_function_calling", False):
|
||||
return True
|
||||
return False
|
||||
else:
|
||||
raise Exception(
|
||||
f"Model not in model_prices_and_context_window.json. You passed model={model}."
|
||||
)
|
||||
|
||||
|
||||
####### HELPER FUNCTIONS ################
|
||||
def register_model(model_cost: Union[str, dict]):
|
||||
"""
|
||||
|
@ -4041,6 +4105,7 @@ def get_optional_params(
|
|||
and custom_llm_provider != "vertex_ai"
|
||||
and custom_llm_provider != "anyscale"
|
||||
and custom_llm_provider != "together_ai"
|
||||
and custom_llm_provider != "mistral"
|
||||
):
|
||||
if custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
|
||||
# ollama actually supports json output
|
||||
|
@ -4711,7 +4776,14 @@ def get_optional_params(
|
|||
if max_tokens:
|
||||
optional_params["max_tokens"] = max_tokens
|
||||
elif custom_llm_provider == "mistral":
|
||||
supported_params = ["temperature", "top_p", "stream", "max_tokens"]
|
||||
supported_params = [
|
||||
"temperature",
|
||||
"top_p",
|
||||
"stream",
|
||||
"max_tokens",
|
||||
"tools",
|
||||
"tool_choice",
|
||||
]
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
if temperature is not None:
|
||||
optional_params["temperature"] = temperature
|
||||
|
@ -4721,6 +4793,10 @@ def get_optional_params(
|
|||
optional_params["stream"] = stream
|
||||
if max_tokens is not None:
|
||||
optional_params["max_tokens"] = max_tokens
|
||||
if tools is not None:
|
||||
optional_params["tools"] = tools
|
||||
if tool_choice is not None:
|
||||
optional_params["tool_choice"] = tool_choice
|
||||
|
||||
# check safe_mode, random_seed: https://docs.mistral.ai/api/#operation/createChatCompletion
|
||||
safe_mode = passed_params.pop("safe_mode", None)
|
||||
|
@ -6945,7 +7021,7 @@ def exception_type(
|
|||
if "500 An internal error has occurred." in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise APIError(
|
||||
status_code=original_exception.status_code,
|
||||
status_code=getattr(original_exception, "status_code", 500),
|
||||
message=f"PalmException - {original_exception.message}",
|
||||
llm_provider="palm",
|
||||
model=model,
|
||||
|
@ -8728,7 +8804,7 @@ class CustomStreamWrapper:
|
|||
or original_chunk.choices[0].delta.tool_calls is not None
|
||||
):
|
||||
try:
|
||||
delta = dict(original_chunk.choices[0].delta)
|
||||
delta = original_chunk.choices[0].delta
|
||||
model_response.system_fingerprint = (
|
||||
original_chunk.system_fingerprint
|
||||
)
|
||||
|
@ -8763,7 +8839,9 @@ class CustomStreamWrapper:
|
|||
is None
|
||||
):
|
||||
t.function.arguments = ""
|
||||
model_response.choices[0].delta = Delta(**delta)
|
||||
_json_delta = delta.model_dump()
|
||||
print_verbose(f"_json_delta: {_json_delta}")
|
||||
model_response.choices[0].delta = Delta(**_json_delta)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
model_response.choices[0].delta = Delta()
|
||||
|
|
|
@ -6,7 +6,8 @@
|
|||
"input_cost_per_token": 0.00003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"gpt-4-turbo-preview": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -15,7 +16,9 @@
|
|||
"input_cost_per_token": 0.00001,
|
||||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"gpt-4-0314": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -33,7 +36,8 @@
|
|||
"input_cost_per_token": 0.00003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"gpt-4-32k": {
|
||||
"max_tokens": 32768,
|
||||
|
@ -69,7 +73,9 @@
|
|||
"input_cost_per_token": 0.00001,
|
||||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"gpt-4-0125-preview": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -78,7 +84,9 @@
|
|||
"input_cost_per_token": 0.00001,
|
||||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"gpt-4-vision-preview": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -105,7 +113,8 @@
|
|||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"gpt-3.5-turbo-0301": {
|
||||
"max_tokens": 4097,
|
||||
|
@ -123,7 +132,8 @@
|
|||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"gpt-3.5-turbo-1106": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -132,7 +142,9 @@
|
|||
"input_cost_per_token": 0.0000010,
|
||||
"output_cost_per_token": 0.0000020,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"gpt-3.5-turbo-0125": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -141,7 +153,9 @@
|
|||
"input_cost_per_token": 0.0000005,
|
||||
"output_cost_per_token": 0.0000015,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"gpt-3.5-turbo-16k": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -286,7 +300,9 @@
|
|||
"input_cost_per_token": 0.00001,
|
||||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"azure/gpt-4-1106-preview": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -295,7 +311,9 @@
|
|||
"input_cost_per_token": 0.00001,
|
||||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"azure/gpt-4-0613": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -304,7 +322,8 @@
|
|||
"input_cost_per_token": 0.00003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"azure/gpt-4-32k-0613": {
|
||||
"max_tokens": 32768,
|
||||
|
@ -331,7 +350,8 @@
|
|||
"input_cost_per_token": 0.00003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"azure/gpt-4-turbo": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -340,7 +360,9 @@
|
|||
"input_cost_per_token": 0.00001,
|
||||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"azure/gpt-4-turbo-vision-preview": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -358,7 +380,8 @@
|
|||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"azure/gpt-35-turbo-1106": {
|
||||
"max_tokens": 16384,
|
||||
|
@ -367,7 +390,20 @@
|
|||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"azure/gpt-35-turbo-0125": {
|
||||
"max_tokens": 16384,
|
||||
"max_input_tokens": 16384,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
"output_cost_per_token": 0.0000015,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"azure/gpt-35-turbo-16k": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -385,7 +421,8 @@
|
|||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"azure/ada": {
|
||||
"max_tokens": 8191,
|
||||
|
@ -514,11 +551,12 @@
|
|||
"mode": "chat"
|
||||
},
|
||||
"mistral/mistral-large-latest": {
|
||||
"max_tokens": 8192,
|
||||
"max_tokens": 32000,
|
||||
"input_cost_per_token": 0.000008,
|
||||
"output_cost_per_token": 0.000024,
|
||||
"litellm_provider": "mistral",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"mistral/mistral-embed": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -676,7 +714,8 @@
|
|||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"gemini-1.5-pro": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -687,6 +726,15 @@
|
|||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-1.5-pro-preview-0215": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-pro-vision": {
|
||||
"max_tokens": 16384,
|
||||
"max_output_tokens": 2048,
|
||||
|
@ -1729,6 +1777,23 @@
|
|||
"output_cost_per_token": 0.0000009,
|
||||
"litellm_provider": "together_ai"
|
||||
},
|
||||
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
|
||||
"input_cost_per_token": 0.0000006,
|
||||
"output_cost_per_token": 0.0000006,
|
||||
"litellm_provider": "together_ai",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"together_ai/mistralai/Mistral-7B-Instruct-v0.1": {
|
||||
"litellm_provider": "together_ai",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"together_ai/togethercomputer/CodeLlama-34b-Instruct": {
|
||||
"litellm_provider": "together_ai",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
},
|
||||
"ollama/llama2": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.0,
|
||||
|
@ -1981,7 +2046,16 @@
|
|||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000015,
|
||||
"litellm_provider": "anyscale",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"anyscale/Mixtral-8x7B-Instruct-v0.1": {
|
||||
"max_tokens": 16384,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000015,
|
||||
"litellm_provider": "anyscale",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"anyscale/HuggingFaceH4/zephyr-7b-beta": {
|
||||
"max_tokens": 16384,
|
||||
|
|
|
@ -40,6 +40,8 @@ litellm_settings:
|
|||
budget_duration: 30d
|
||||
general_settings:
|
||||
master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
|
||||
proxy_budget_rescheduler_min_time: 30
|
||||
proxy_budget_rescheduler_max_time: 60
|
||||
# database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy
|
||||
|
||||
environment_variables:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.27.12"
|
||||
version = "1.27.15"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.27.12"
|
||||
version = "1.27.15"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -10,6 +10,7 @@ gunicorn==21.2.0 # server dep
|
|||
boto3==1.34.34 # aws bedrock/sagemaker calls
|
||||
redis==5.0.0 # caching
|
||||
numpy==1.24.3 # semantic caching
|
||||
pandas==2.1.1 # for viewing clickhouse spend analytics
|
||||
prisma==0.11.0 # for db
|
||||
mangum==0.17.0 # for aws lambda functions
|
||||
google-generativeai==0.3.2 # for vertex ai calls
|
||||
|
|
|
@ -449,7 +449,7 @@ async def test_key_with_budgets():
|
|||
reset_at_init_value = key_info["info"]["budget_reset_at"]
|
||||
reset_at_new_value = None
|
||||
i = 0
|
||||
await asyncio.sleep(610)
|
||||
await asyncio.sleep(120)
|
||||
while i < 3:
|
||||
key_info = await get_key_info(session=session, get_key=key, call_key=key)
|
||||
reset_at_new_value = key_info["info"]["budget_reset_at"]
|
||||
|
@ -490,6 +490,7 @@ async def test_key_crossing_budget():
|
|||
assert "ExceededTokenBudget: Current spend for token:" in str(e)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_key_info_spend_values_sagemaker():
|
||||
"""
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[30280,[\"303\",\"static/chunks/303-d80f23087a9e6aec.js\",\"931\",\"static/chunks/app/page-7a72d94df5776b94.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"TR33urzBPpj0A5KjAFnaM\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[30280,[\"303\",\"static/chunks/303-d80f23087a9e6aec.js\",\"931\",\"static/chunks/app/page-7a72d94df5776b94.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"TR33urzBPpj0A5KjAFnaM\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
|
|
|
@ -313,6 +313,7 @@ export const userSpendLogsCall = async (
|
|||
endTime: String
|
||||
) => {
|
||||
try {
|
||||
console.log(`user role in spend logs call: ${userRole}`);
|
||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/spend/logs` : `/spend/logs`;
|
||||
if (userRole == "App Owner") {
|
||||
url = `${url}/?user_id=${userID}&start_date=${startTime}&end_date=${endTime}`;
|
||||
|
@ -343,6 +344,96 @@ export const userSpendLogsCall = async (
|
|||
}
|
||||
};
|
||||
|
||||
export const adminSpendLogsCall = async (accessToken: String) => {
|
||||
try {
|
||||
let url = proxyBaseUrl
|
||||
? `${proxyBaseUrl}/global/spend/logs`
|
||||
: `/global/spend/logs`;
|
||||
|
||||
message.info("Making spend logs request");
|
||||
const response = await fetch(url, {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
});
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
console.log(data);
|
||||
message.success("Spend Logs received");
|
||||
return data;
|
||||
} catch (error) {
|
||||
console.error("Failed to create key:", error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
export const adminTopKeysCall = async (accessToken: String) => {
|
||||
try {
|
||||
let url = proxyBaseUrl
|
||||
? `${proxyBaseUrl}/global/spend/keys?limit=5`
|
||||
: `/global/spend/keys?limit=5`;
|
||||
|
||||
message.info("Making spend keys request");
|
||||
const response = await fetch(url, {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
});
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
console.log(data);
|
||||
message.success("Spend Logs received");
|
||||
return data;
|
||||
} catch (error) {
|
||||
console.error("Failed to create key:", error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
export const adminTopModelsCall = async (accessToken: String) => {
|
||||
try {
|
||||
let url = proxyBaseUrl
|
||||
? `${proxyBaseUrl}/global/spend/models?limit=5`
|
||||
: `/global/spend/models?limit=5`;
|
||||
|
||||
message.info("Making spend models request");
|
||||
const response = await fetch(url, {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
});
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
console.log(data);
|
||||
message.success("Spend Logs received");
|
||||
return data;
|
||||
} catch (error) {
|
||||
console.error("Failed to create key:", error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
export const keyInfoCall = async (accessToken: String, keys: String[]) => {
|
||||
try {
|
||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/v2/key/info`;
|
||||
|
|
|
@ -2,7 +2,13 @@ import { BarChart, Card, Title } from "@tremor/react";
|
|||
|
||||
import React, { useState, useEffect } from "react";
|
||||
import { Grid, Col, Text, LineChart } from "@tremor/react";
|
||||
import { userSpendLogsCall, keyInfoCall } from "./networking";
|
||||
import {
|
||||
userSpendLogsCall,
|
||||
keyInfoCall,
|
||||
adminSpendLogsCall,
|
||||
adminTopKeysCall,
|
||||
adminTopModelsCall,
|
||||
} from "./networking";
|
||||
import { start } from "repl";
|
||||
|
||||
interface UsagePageProps {
|
||||
|
@ -164,29 +170,61 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
if (accessToken && token && userRole && userID) {
|
||||
const fetchData = async () => {
|
||||
try {
|
||||
await userSpendLogsCall(
|
||||
accessToken,
|
||||
token,
|
||||
userRole,
|
||||
userID,
|
||||
startTime,
|
||||
endTime
|
||||
).then(async (response) => {
|
||||
const topKeysResponse = await keyInfoCall(
|
||||
accessToken,
|
||||
getTopKeys(response)
|
||||
);
|
||||
const filtered_keys = topKeysResponse["info"].map((k: any) => ({
|
||||
key: (k["key_name"] || k["key_alias"] || k["token"]).substring(
|
||||
/**
|
||||
* If user is Admin - query the global views endpoints
|
||||
* If user is App Owner - use the normal spend logs call
|
||||
*/
|
||||
console.log(`user role: ${userRole}`);
|
||||
if (userRole == "Admin") {
|
||||
const overall_spend = await adminSpendLogsCall(accessToken);
|
||||
setKeySpendData(overall_spend);
|
||||
const top_keys = await adminTopKeysCall(accessToken);
|
||||
const filtered_keys = top_keys.map((k: any) => ({
|
||||
key: (k["key_name"] || k["key_alias"] || k["api_key"]).substring(
|
||||
0,
|
||||
7
|
||||
),
|
||||
spend: k["spend"],
|
||||
spend: k["total_spend"],
|
||||
}));
|
||||
setTopKeys(filtered_keys);
|
||||
setTopUsers(getTopUsers(response));
|
||||
setKeySpendData(response);
|
||||
});
|
||||
const top_models = await adminTopModelsCall(accessToken);
|
||||
} else if (userRole == "App Owner") {
|
||||
await userSpendLogsCall(
|
||||
accessToken,
|
||||
token,
|
||||
userRole,
|
||||
userID,
|
||||
startTime,
|
||||
endTime
|
||||
).then(async (response) => {
|
||||
console.log("result from spend logs call", response);
|
||||
if ("daily_spend" in response) {
|
||||
// this is from clickhouse analytics
|
||||
//
|
||||
let daily_spend = response["daily_spend"];
|
||||
console.log("daily spend", daily_spend);
|
||||
setKeySpendData(daily_spend);
|
||||
let topApiKeys = response.top_api_keys;
|
||||
setTopKeys(topApiKeys);
|
||||
} else {
|
||||
const topKeysResponse = await keyInfoCall(
|
||||
accessToken,
|
||||
getTopKeys(response)
|
||||
);
|
||||
const filtered_keys = topKeysResponse["info"].map((k: any) => ({
|
||||
key: (
|
||||
k["key_name"] ||
|
||||
k["key_alias"] ||
|
||||
k["token"]
|
||||
).substring(0, 7),
|
||||
spend: k["spend"],
|
||||
}));
|
||||
setTopKeys(filtered_keys);
|
||||
setTopUsers(getTopUsers(response));
|
||||
setKeySpendData(response);
|
||||
}
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("There was an error fetching the data", error);
|
||||
// Optionally, update your UI to reflect the error state here as well
|
||||
|
@ -210,7 +248,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
valueFormatter={valueFormatter}
|
||||
yAxisWidth={100}
|
||||
tickGap={5}
|
||||
customTooltip={customTooltip}
|
||||
// customTooltip={customTooltip}
|
||||
/>
|
||||
</Card>
|
||||
</Col>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue