forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_selective_access_ui
This commit is contained in:
commit
35a22e2247
55 changed files with 2284 additions and 1102 deletions
|
@ -130,6 +130,7 @@ jobs:
|
||||||
pip install "langfuse>=2.0.0"
|
pip install "langfuse>=2.0.0"
|
||||||
pip install numpydoc
|
pip install numpydoc
|
||||||
pip install prisma
|
pip install prisma
|
||||||
|
pip install fastapi
|
||||||
pip install "httpx==0.24.1"
|
pip install "httpx==0.24.1"
|
||||||
pip install "gunicorn==21.2.0"
|
pip install "gunicorn==21.2.0"
|
||||||
pip install "anyio==3.7.1"
|
pip install "anyio==3.7.1"
|
||||||
|
|
|
@ -1,18 +1,25 @@
|
||||||
# Function Calling
|
# Function Calling
|
||||||
Function calling is supported with the following models on OpenAI, Azure OpenAI
|
|
||||||
|
|
||||||
- gpt-4
|
## Checking if a model supports function calling
|
||||||
- gpt-4-1106-preview
|
|
||||||
- gpt-4-0613
|
|
||||||
- gpt-3.5-turbo
|
|
||||||
- gpt-3.5-turbo-1106
|
|
||||||
- gpt-3.5-turbo-0613
|
|
||||||
- Non OpenAI LLMs (litellm adds the function call to the prompt for these llms)
|
|
||||||
|
|
||||||
In addition, parallel function calls is supported on the following models:
|
Use `litellm.supports_function_calling(model="")` -> returns `True` if model supports Function calling, `False` if not
|
||||||
- gpt-4-1106-preview
|
|
||||||
- gpt-3.5-turbo-1106
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
assert litellm.supports_function_calling(model="gpt-3.5-turbo") == True
|
||||||
|
assert litellm.supports_function_calling(model="azure/gpt-4-1106-preview") == True
|
||||||
|
assert litellm.supports_function_calling(model="palm/chat-bison") == False
|
||||||
|
assert litellm.supports_function_calling(model="ollama/llama2") == False
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Checking if a model supports parallel function calling
|
||||||
|
|
||||||
|
Use `litellm.supports_parallel_function_calling(model="")` -> returns `True` if model supports parallel function calling, `False` if not
|
||||||
|
|
||||||
|
```python
|
||||||
|
assert litellm.supports_parallel_function_calling(model="gpt-4-turbo-preview") == True
|
||||||
|
assert litellm.supports_parallel_function_calling(model="gpt-4") == False
|
||||||
|
```
|
||||||
## Parallel Function calling
|
## Parallel Function calling
|
||||||
Parallel function calling is the model's ability to perform multiple function calls together, allowing the effects and results of these function calls to be resolved in parallel
|
Parallel function calling is the model's ability to perform multiple function calls together, allowing the effects and results of these function calls to be resolved in parallel
|
||||||
|
|
||||||
|
|
|
@ -291,7 +291,6 @@ Here's an example of using a bedrock model with LiteLLM
|
||||||
| Anthropic Claude-V2.1 | `completion(model='bedrock/anthropic.claude-v2:1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
|
| Anthropic Claude-V2.1 | `completion(model='bedrock/anthropic.claude-v2:1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
|
||||||
| Anthropic Claude-V2 | `completion(model='bedrock/anthropic.claude-v2', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
|
| Anthropic Claude-V2 | `completion(model='bedrock/anthropic.claude-v2', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
|
||||||
| Anthropic Claude-Instant V1 | `completion(model='bedrock/anthropic.claude-instant-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
|
| Anthropic Claude-Instant V1 | `completion(model='bedrock/anthropic.claude-instant-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
|
||||||
| Anthropic Claude-V1 | `completion(model='bedrock/anthropic.claude-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
|
|
||||||
| Amazon Titan Lite | `completion(model='bedrock/amazon.titan-text-lite-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
|
| Amazon Titan Lite | `completion(model='bedrock/amazon.titan-text-lite-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
|
||||||
| Amazon Titan Express | `completion(model='bedrock/amazon.titan-text-express-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
|
| Amazon Titan Express | `completion(model='bedrock/amazon.titan-text-express-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
|
||||||
| Cohere Command | `completion(model='bedrock/cohere.command-text-v14', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
|
| Cohere Command | `completion(model='bedrock/cohere.command-text-v14', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# VertexAI - Google [Gemini, Model Garden]
|
# VertexAI - Google [Gemini, Model Garden]
|
||||||
|
|
||||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_VertextAI_Example.ipynb">
|
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_VertextAI_Example.ipynb">
|
||||||
|
@ -22,8 +25,36 @@ response = litellm.completion(model="gemini-pro", messages=[{"role": "user", "co
|
||||||
|
|
||||||
## OpenAI Proxy Usage
|
## OpenAI Proxy Usage
|
||||||
|
|
||||||
|
Here's how to use Vertex AI with the LiteLLM Proxy Server
|
||||||
|
|
||||||
1. Modify the config.yaml
|
1. Modify the config.yaml
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
|
||||||
|
<TabItem value="completion_param" label="Different location per model">
|
||||||
|
|
||||||
|
Use this when you need to set a different location for each vertex model
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gemini-vision
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/gemini-1.0-pro-vision-001
|
||||||
|
vertex_project: "project-id"
|
||||||
|
vertex_location: "us-central1"
|
||||||
|
- model_name: gemini-vision
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/gemini-1.0-pro-vision-001
|
||||||
|
vertex_project: "project-id2"
|
||||||
|
vertex_location: "us-east"
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="litellm_param" label="One location all vertex models">
|
||||||
|
|
||||||
|
Use this when you have one vertex location for all models
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
vertex_project: "hardy-device-38811" # Your Project ID
|
vertex_project: "hardy-device-38811" # Your Project ID
|
||||||
|
@ -35,6 +66,10 @@ model_list:
|
||||||
model: gemini-pro
|
model: gemini-pro
|
||||||
```
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
2. Start the proxy
|
2. Start the proxy
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
44
docs/my-website/docs/proxy/metrics.md
Normal file
44
docs/my-website/docs/proxy/metrics.md
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
# 💸 GET Daily Spend, Usage Metrics
|
||||||
|
|
||||||
|
## Request Format
|
||||||
|
```shell
|
||||||
|
curl -X GET "http://0.0.0.0:4000/daily_metrics" -H "Authorization: Bearer sk-1234"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Response format
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
daily_spend = [
|
||||||
|
{
|
||||||
|
"daily_spend": 7.9261938052047e+16,
|
||||||
|
"day": "2024-02-01T00:00:00",
|
||||||
|
"spend_per_model": {"azure/gpt-4": 7.9261938052047e+16},
|
||||||
|
"spend_per_api_key": {
|
||||||
|
"76": 914495704992000.0,
|
||||||
|
"12": 905726697912000.0,
|
||||||
|
"71": 866312628003000.0,
|
||||||
|
"28": 865461799332000.0,
|
||||||
|
"13": 859151538396000.0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"daily_spend": 7.938489251309491e+16,
|
||||||
|
"day": "2024-02-02T00:00:00",
|
||||||
|
"spend_per_model": {"gpt-3.5": 7.938489251309491e+16},
|
||||||
|
"spend_per_api_key": {
|
||||||
|
"91": 896805036036000.0,
|
||||||
|
"78": 889692646082000.0,
|
||||||
|
"49": 885386687861000.0,
|
||||||
|
"28": 873869890984000.0,
|
||||||
|
"56": 867398637692000.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
],
|
||||||
|
total_spend = 200,
|
||||||
|
top_models = {"gpt4": 0.2, "vertexai/gemini-pro":10},
|
||||||
|
top_api_keys = {"899922": 0.9, "838hcjd999seerr88": 20}
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
```
|
|
@ -40,6 +40,7 @@ const sidebars = {
|
||||||
"proxy/virtual_keys",
|
"proxy/virtual_keys",
|
||||||
"proxy/users",
|
"proxy/users",
|
||||||
"proxy/ui",
|
"proxy/ui",
|
||||||
|
"proxy/metrics",
|
||||||
"proxy/model_management",
|
"proxy/model_management",
|
||||||
"proxy/health",
|
"proxy/health",
|
||||||
"proxy/debugging",
|
"proxy/debugging",
|
||||||
|
|
|
@ -110,3 +110,138 @@ async def view_spend_logs_from_clickhouse(
|
||||||
"log_count": num_rows,
|
"log_count": num_rows,
|
||||||
}
|
}
|
||||||
return response_data
|
return response_data
|
||||||
|
|
||||||
|
|
||||||
|
def _create_clickhouse_material_views(client=None, table_names=[]):
|
||||||
|
# Create Materialized Views if they don't exist
|
||||||
|
# Materialized Views send new inserted rows to the aggregate tables
|
||||||
|
|
||||||
|
verbose_logger.debug("Clickhouse: Creating Materialized Views")
|
||||||
|
if "daily_aggregated_spend_per_model_mv" not in table_names:
|
||||||
|
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
|
||||||
|
TO daily_aggregated_spend_per_model
|
||||||
|
AS
|
||||||
|
SELECT
|
||||||
|
toDate(startTime) as day,
|
||||||
|
sumState(spend) AS DailySpend,
|
||||||
|
model as model
|
||||||
|
FROM spend_logs
|
||||||
|
GROUP BY
|
||||||
|
day, model
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if "daily_aggregated_spend_per_api_key_mv" not in table_names:
|
||||||
|
verbose_logger.debug(
|
||||||
|
"Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
|
||||||
|
)
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
|
||||||
|
TO daily_aggregated_spend_per_api_key
|
||||||
|
AS
|
||||||
|
SELECT
|
||||||
|
toDate(startTime) as day,
|
||||||
|
sumState(spend) AS DailySpend,
|
||||||
|
api_key as api_key
|
||||||
|
FROM spend_logs
|
||||||
|
GROUP BY
|
||||||
|
day, api_key
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if "daily_aggregated_spend_per_user_mv" not in table_names:
|
||||||
|
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
|
||||||
|
TO daily_aggregated_spend_per_user
|
||||||
|
AS
|
||||||
|
SELECT
|
||||||
|
toDate(startTime) as day,
|
||||||
|
sumState(spend) AS DailySpend,
|
||||||
|
user as user
|
||||||
|
FROM spend_logs
|
||||||
|
GROUP BY
|
||||||
|
day, user
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if "daily_aggregated_spend_mv" not in table_names:
|
||||||
|
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
|
||||||
|
TO daily_aggregated_spend
|
||||||
|
AS
|
||||||
|
SELECT
|
||||||
|
toDate(startTime) as day,
|
||||||
|
sumState(spend) AS DailySpend
|
||||||
|
FROM spend_logs
|
||||||
|
GROUP BY
|
||||||
|
day
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
|
||||||
|
# Basic Logging works without this - this is only used for low latency reporting apis
|
||||||
|
verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
|
||||||
|
|
||||||
|
# Create Aggregeate Tables if they don't exist
|
||||||
|
if "daily_aggregated_spend_per_model" not in table_names:
|
||||||
|
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE TABLE daily_aggregated_spend_per_model
|
||||||
|
(
|
||||||
|
`day` Date,
|
||||||
|
`DailySpend` AggregateFunction(sum, Float64),
|
||||||
|
`model` String
|
||||||
|
)
|
||||||
|
ENGINE = SummingMergeTree()
|
||||||
|
ORDER BY (day, model);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if "daily_aggregated_spend_per_api_key" not in table_names:
|
||||||
|
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE TABLE daily_aggregated_spend_per_api_key
|
||||||
|
(
|
||||||
|
`day` Date,
|
||||||
|
`DailySpend` AggregateFunction(sum, Float64),
|
||||||
|
`api_key` String
|
||||||
|
)
|
||||||
|
ENGINE = SummingMergeTree()
|
||||||
|
ORDER BY (day, api_key);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if "daily_aggregated_spend_per_user" not in table_names:
|
||||||
|
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE TABLE daily_aggregated_spend_per_user
|
||||||
|
(
|
||||||
|
`day` Date,
|
||||||
|
`DailySpend` AggregateFunction(sum, Float64),
|
||||||
|
`user` String
|
||||||
|
)
|
||||||
|
ENGINE = SummingMergeTree()
|
||||||
|
ORDER BY (day, user);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if "daily_aggregated_spend" not in table_names:
|
||||||
|
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE TABLE daily_aggregated_spend
|
||||||
|
(
|
||||||
|
`day` Date,
|
||||||
|
`DailySpend` AggregateFunction(sum, Float64),
|
||||||
|
)
|
||||||
|
ENGINE = SummingMergeTree()
|
||||||
|
ORDER BY (day);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
|
@ -549,6 +549,8 @@ from .utils import (
|
||||||
token_counter,
|
token_counter,
|
||||||
cost_per_token,
|
cost_per_token,
|
||||||
completion_cost,
|
completion_cost,
|
||||||
|
supports_function_calling,
|
||||||
|
supports_parallel_function_calling,
|
||||||
get_litellm_params,
|
get_litellm_params,
|
||||||
Logging,
|
Logging,
|
||||||
acreate,
|
acreate,
|
||||||
|
|
|
@ -27,6 +27,151 @@ import litellm, uuid
|
||||||
from litellm._logging import print_verbose, verbose_logger
|
from litellm._logging import print_verbose, verbose_logger
|
||||||
|
|
||||||
|
|
||||||
|
def create_client():
|
||||||
|
try:
|
||||||
|
import clickhouse_connect
|
||||||
|
|
||||||
|
port = os.getenv("CLICKHOUSE_PORT")
|
||||||
|
clickhouse_host = os.getenv("CLICKHOUSE_HOST")
|
||||||
|
if clickhouse_host is not None:
|
||||||
|
verbose_logger.debug("setting up clickhouse")
|
||||||
|
if port is not None and isinstance(port, str):
|
||||||
|
port = int(port)
|
||||||
|
|
||||||
|
client = clickhouse_connect.get_client(
|
||||||
|
host=os.getenv("CLICKHOUSE_HOST"),
|
||||||
|
port=port,
|
||||||
|
username=os.getenv("CLICKHOUSE_USERNAME"),
|
||||||
|
password=os.getenv("CLICKHOUSE_PASSWORD"),
|
||||||
|
)
|
||||||
|
return client
|
||||||
|
else:
|
||||||
|
raise Exception("Clickhouse: Clickhouse host not set")
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Clickhouse: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def build_daily_metrics():
|
||||||
|
click_house_client = create_client()
|
||||||
|
|
||||||
|
# get daily spend
|
||||||
|
daily_spend = click_house_client.query_df(
|
||||||
|
"""
|
||||||
|
SELECT sumMerge(DailySpend) as daily_spend, day FROM daily_aggregated_spend GROUP BY day
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
# get daily spend per model
|
||||||
|
daily_spend_per_model = click_house_client.query_df(
|
||||||
|
"""
|
||||||
|
SELECT sumMerge(DailySpend) as daily_spend, day, model FROM daily_aggregated_spend_per_model GROUP BY day, model
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
new_df = daily_spend_per_model.to_dict(orient="records")
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
df = pd.DataFrame(new_df)
|
||||||
|
# Group by 'day' and create a dictionary for each group
|
||||||
|
result_dict = {}
|
||||||
|
for day, group in df.groupby("day"):
|
||||||
|
models = group["model"].tolist()
|
||||||
|
spend = group["daily_spend"].tolist()
|
||||||
|
spend_per_model = {model: spend for model, spend in zip(models, spend)}
|
||||||
|
result_dict[day] = spend_per_model
|
||||||
|
|
||||||
|
# Display the resulting dictionary
|
||||||
|
|
||||||
|
# get daily spend per API key
|
||||||
|
daily_spend_per_api_key = click_house_client.query_df(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
daily_spend,
|
||||||
|
day,
|
||||||
|
api_key
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
sumMerge(DailySpend) as daily_spend,
|
||||||
|
day,
|
||||||
|
api_key,
|
||||||
|
RANK() OVER (PARTITION BY day ORDER BY sumMerge(DailySpend) DESC) as spend_rank
|
||||||
|
FROM
|
||||||
|
daily_aggregated_spend_per_api_key
|
||||||
|
GROUP BY
|
||||||
|
day,
|
||||||
|
api_key
|
||||||
|
) AS ranked_api_keys
|
||||||
|
WHERE
|
||||||
|
spend_rank <= 5
|
||||||
|
AND day IS NOT NULL
|
||||||
|
ORDER BY
|
||||||
|
day,
|
||||||
|
daily_spend DESC
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
new_df = daily_spend_per_api_key.to_dict(orient="records")
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
df = pd.DataFrame(new_df)
|
||||||
|
# Group by 'day' and create a dictionary for each group
|
||||||
|
api_key_result_dict = {}
|
||||||
|
for day, group in df.groupby("day"):
|
||||||
|
api_keys = group["api_key"].tolist()
|
||||||
|
spend = group["daily_spend"].tolist()
|
||||||
|
spend_per_api_key = {api_key: spend for api_key, spend in zip(api_keys, spend)}
|
||||||
|
api_key_result_dict[day] = spend_per_api_key
|
||||||
|
|
||||||
|
# Display the resulting dictionary
|
||||||
|
|
||||||
|
# Calculate total spend across all days
|
||||||
|
total_spend = daily_spend["daily_spend"].sum()
|
||||||
|
|
||||||
|
# Identify top models and top API keys with the highest spend across all days
|
||||||
|
top_models = {}
|
||||||
|
top_api_keys = {}
|
||||||
|
|
||||||
|
for day, spend_per_model in result_dict.items():
|
||||||
|
for model, model_spend in spend_per_model.items():
|
||||||
|
if model not in top_models or model_spend > top_models[model]:
|
||||||
|
top_models[model] = model_spend
|
||||||
|
|
||||||
|
for day, spend_per_api_key in api_key_result_dict.items():
|
||||||
|
for api_key, api_key_spend in spend_per_api_key.items():
|
||||||
|
if api_key not in top_api_keys or api_key_spend > top_api_keys[api_key]:
|
||||||
|
top_api_keys[api_key] = api_key_spend
|
||||||
|
|
||||||
|
# for each day in daily spend, look up the day in result_dict and api_key_result_dict
|
||||||
|
# Assuming daily_spend DataFrame has 'day' column
|
||||||
|
result = []
|
||||||
|
for index, row in daily_spend.iterrows():
|
||||||
|
day = row["day"]
|
||||||
|
data_day = row.to_dict()
|
||||||
|
|
||||||
|
# Look up in result_dict
|
||||||
|
if day in result_dict:
|
||||||
|
spend_per_model = result_dict[day]
|
||||||
|
# Assuming there is a column named 'model' in daily_spend
|
||||||
|
data_day["spend_per_model"] = spend_per_model # Assign 0 if model not found
|
||||||
|
|
||||||
|
# Look up in api_key_result_dict
|
||||||
|
if day in api_key_result_dict:
|
||||||
|
spend_per_api_key = api_key_result_dict[day]
|
||||||
|
# Assuming there is a column named 'api_key' in daily_spend
|
||||||
|
data_day["spend_per_api_key"] = spend_per_api_key
|
||||||
|
|
||||||
|
result.append(data_day)
|
||||||
|
|
||||||
|
data_to_return = {}
|
||||||
|
data_to_return["daily_spend"] = result
|
||||||
|
|
||||||
|
data_to_return["total_spend"] = total_spend
|
||||||
|
data_to_return["top_models"] = top_models
|
||||||
|
data_to_return["top_api_keys"] = top_api_keys
|
||||||
|
return data_to_return
|
||||||
|
|
||||||
|
|
||||||
|
# build_daily_metrics()
|
||||||
|
|
||||||
|
|
||||||
def _start_clickhouse():
|
def _start_clickhouse():
|
||||||
import clickhouse_connect
|
import clickhouse_connect
|
||||||
|
|
||||||
|
@ -86,6 +231,14 @@ def _start_clickhouse():
|
||||||
response = client.query("DESCRIBE default.spend_logs")
|
response = client.query("DESCRIBE default.spend_logs")
|
||||||
verbose_logger.debug(f"spend logs schema ={response.result_rows}")
|
verbose_logger.debug(f"spend logs schema ={response.result_rows}")
|
||||||
|
|
||||||
|
# RUN Enterprise Clickhouse Setup
|
||||||
|
# TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
|
||||||
|
from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
|
||||||
|
from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
|
||||||
|
|
||||||
|
_create_clickhouse_aggregate_tables(client=client, table_names=table_names)
|
||||||
|
_create_clickhouse_material_views(client=client, table_names=table_names)
|
||||||
|
|
||||||
|
|
||||||
class ClickhouseLogger:
|
class ClickhouseLogger:
|
||||||
# Class variables or attributes
|
# Class variables or attributes
|
||||||
|
|
|
@ -278,7 +278,11 @@ def completion(
|
||||||
import google.auth
|
import google.auth
|
||||||
|
|
||||||
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
||||||
|
print_verbose(
|
||||||
|
f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
|
||||||
|
)
|
||||||
creds, _ = google.auth.default(quota_project_id=vertex_project)
|
creds, _ = google.auth.default(quota_project_id=vertex_project)
|
||||||
|
print_verbose(f"VERTEX AI: creds={creds}")
|
||||||
vertexai.init(
|
vertexai.init(
|
||||||
project=vertex_project, location=vertex_location, credentials=creds
|
project=vertex_project, location=vertex_location, credentials=creds
|
||||||
)
|
)
|
||||||
|
|
|
@ -1467,12 +1467,14 @@ def completion(
|
||||||
response = model_response
|
response = model_response
|
||||||
elif custom_llm_provider == "vertex_ai":
|
elif custom_llm_provider == "vertex_ai":
|
||||||
vertex_ai_project = (
|
vertex_ai_project = (
|
||||||
optional_params.pop("vertex_ai_project", None)
|
optional_params.pop("vertex_project", None)
|
||||||
|
or optional_params.pop("vertex_ai_project", None)
|
||||||
or litellm.vertex_project
|
or litellm.vertex_project
|
||||||
or get_secret("VERTEXAI_PROJECT")
|
or get_secret("VERTEXAI_PROJECT")
|
||||||
)
|
)
|
||||||
vertex_ai_location = (
|
vertex_ai_location = (
|
||||||
optional_params.pop("vertex_ai_location", None)
|
optional_params.pop("vertex_location", None)
|
||||||
|
or optional_params.pop("vertex_ai_location", None)
|
||||||
or litellm.vertex_location
|
or litellm.vertex_location
|
||||||
or get_secret("VERTEXAI_LOCATION")
|
or get_secret("VERTEXAI_LOCATION")
|
||||||
)
|
)
|
||||||
|
@ -2566,12 +2568,14 @@ def embedding(
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "vertex_ai":
|
elif custom_llm_provider == "vertex_ai":
|
||||||
vertex_ai_project = (
|
vertex_ai_project = (
|
||||||
optional_params.pop("vertex_ai_project", None)
|
optional_params.pop("vertex_project", None)
|
||||||
|
or optional_params.pop("vertex_ai_project", None)
|
||||||
or litellm.vertex_project
|
or litellm.vertex_project
|
||||||
or get_secret("VERTEXAI_PROJECT")
|
or get_secret("VERTEXAI_PROJECT")
|
||||||
)
|
)
|
||||||
vertex_ai_location = (
|
vertex_ai_location = (
|
||||||
optional_params.pop("vertex_ai_location", None)
|
optional_params.pop("vertex_location", None)
|
||||||
|
or optional_params.pop("vertex_ai_location", None)
|
||||||
or litellm.vertex_location
|
or litellm.vertex_location
|
||||||
or get_secret("VERTEXAI_LOCATION")
|
or get_secret("VERTEXAI_LOCATION")
|
||||||
)
|
)
|
||||||
|
|
|
@ -6,7 +6,8 @@
|
||||||
"input_cost_per_token": 0.00003,
|
"input_cost_per_token": 0.00003,
|
||||||
"output_cost_per_token": 0.00006,
|
"output_cost_per_token": 0.00006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-4-turbo-preview": {
|
"gpt-4-turbo-preview": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
|
@ -15,7 +16,9 @@
|
||||||
"input_cost_per_token": 0.00001,
|
"input_cost_per_token": 0.00001,
|
||||||
"output_cost_per_token": 0.00003,
|
"output_cost_per_token": 0.00003,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-4-0314": {
|
"gpt-4-0314": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
|
@ -33,7 +36,8 @@
|
||||||
"input_cost_per_token": 0.00003,
|
"input_cost_per_token": 0.00003,
|
||||||
"output_cost_per_token": 0.00006,
|
"output_cost_per_token": 0.00006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-4-32k": {
|
"gpt-4-32k": {
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
|
@ -69,7 +73,9 @@
|
||||||
"input_cost_per_token": 0.00001,
|
"input_cost_per_token": 0.00001,
|
||||||
"output_cost_per_token": 0.00003,
|
"output_cost_per_token": 0.00003,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-4-0125-preview": {
|
"gpt-4-0125-preview": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -78,7 +84,9 @@
|
||||||
"input_cost_per_token": 0.00001,
|
"input_cost_per_token": 0.00001,
|
||||||
"output_cost_per_token": 0.00003,
|
"output_cost_per_token": 0.00003,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-4-vision-preview": {
|
"gpt-4-vision-preview": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -105,7 +113,8 @@
|
||||||
"input_cost_per_token": 0.0000015,
|
"input_cost_per_token": 0.0000015,
|
||||||
"output_cost_per_token": 0.000002,
|
"output_cost_per_token": 0.000002,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-0301": {
|
"gpt-3.5-turbo-0301": {
|
||||||
"max_tokens": 4097,
|
"max_tokens": 4097,
|
||||||
|
@ -123,7 +132,8 @@
|
||||||
"input_cost_per_token": 0.0000015,
|
"input_cost_per_token": 0.0000015,
|
||||||
"output_cost_per_token": 0.000002,
|
"output_cost_per_token": 0.000002,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-1106": {
|
"gpt-3.5-turbo-1106": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -132,7 +142,9 @@
|
||||||
"input_cost_per_token": 0.0000010,
|
"input_cost_per_token": 0.0000010,
|
||||||
"output_cost_per_token": 0.0000020,
|
"output_cost_per_token": 0.0000020,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-0125": {
|
"gpt-3.5-turbo-0125": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -141,7 +153,9 @@
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.0000005,
|
||||||
"output_cost_per_token": 0.0000015,
|
"output_cost_per_token": 0.0000015,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-16k": {
|
"gpt-3.5-turbo-16k": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -286,7 +300,9 @@
|
||||||
"input_cost_per_token": 0.00001,
|
"input_cost_per_token": 0.00001,
|
||||||
"output_cost_per_token": 0.00003,
|
"output_cost_per_token": 0.00003,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/gpt-4-1106-preview": {
|
"azure/gpt-4-1106-preview": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -295,7 +311,9 @@
|
||||||
"input_cost_per_token": 0.00001,
|
"input_cost_per_token": 0.00001,
|
||||||
"output_cost_per_token": 0.00003,
|
"output_cost_per_token": 0.00003,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/gpt-4-0613": {
|
"azure/gpt-4-0613": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
|
@ -304,7 +322,8 @@
|
||||||
"input_cost_per_token": 0.00003,
|
"input_cost_per_token": 0.00003,
|
||||||
"output_cost_per_token": 0.00006,
|
"output_cost_per_token": 0.00006,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/gpt-4-32k-0613": {
|
"azure/gpt-4-32k-0613": {
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
|
@ -331,7 +350,8 @@
|
||||||
"input_cost_per_token": 0.00003,
|
"input_cost_per_token": 0.00003,
|
||||||
"output_cost_per_token": 0.00006,
|
"output_cost_per_token": 0.00006,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/gpt-4-turbo": {
|
"azure/gpt-4-turbo": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -340,7 +360,9 @@
|
||||||
"input_cost_per_token": 0.00001,
|
"input_cost_per_token": 0.00001,
|
||||||
"output_cost_per_token": 0.00003,
|
"output_cost_per_token": 0.00003,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/gpt-4-turbo-vision-preview": {
|
"azure/gpt-4-turbo-vision-preview": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -358,7 +380,8 @@
|
||||||
"input_cost_per_token": 0.000003,
|
"input_cost_per_token": 0.000003,
|
||||||
"output_cost_per_token": 0.000004,
|
"output_cost_per_token": 0.000004,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/gpt-35-turbo-1106": {
|
"azure/gpt-35-turbo-1106": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -367,7 +390,20 @@
|
||||||
"input_cost_per_token": 0.0000015,
|
"input_cost_per_token": 0.0000015,
|
||||||
"output_cost_per_token": 0.000002,
|
"output_cost_per_token": 0.000002,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
|
},
|
||||||
|
"azure/gpt-35-turbo-0125": {
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"max_input_tokens": 16384,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.0000005,
|
||||||
|
"output_cost_per_token": 0.0000015,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/gpt-35-turbo-16k": {
|
"azure/gpt-35-turbo-16k": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -385,7 +421,8 @@
|
||||||
"input_cost_per_token": 0.0000015,
|
"input_cost_per_token": 0.0000015,
|
||||||
"output_cost_per_token": 0.000002,
|
"output_cost_per_token": 0.000002,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/ada": {
|
"azure/ada": {
|
||||||
"max_tokens": 8191,
|
"max_tokens": 8191,
|
||||||
|
@ -514,11 +551,12 @@
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
"mistral/mistral-large-latest": {
|
"mistral/mistral-large-latest": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 32000,
|
||||||
"input_cost_per_token": 0.000008,
|
"input_cost_per_token": 0.000008,
|
||||||
"output_cost_per_token": 0.000024,
|
"output_cost_per_token": 0.000024,
|
||||||
"litellm_provider": "mistral",
|
"litellm_provider": "mistral",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"mistral/mistral-embed": {
|
"mistral/mistral-embed": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
|
@ -676,7 +714,8 @@
|
||||||
"input_cost_per_token": 0.00000025,
|
"input_cost_per_token": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000005,
|
"output_cost_per_token": 0.0000005,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"gemini-1.5-pro": {
|
"gemini-1.5-pro": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
|
@ -687,6 +726,15 @@
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"gemini-1.5-pro-preview-0215": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 1000000,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0,
|
||||||
|
"output_cost_per_token": 0,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"gemini-pro-vision": {
|
"gemini-pro-vision": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
"max_output_tokens": 2048,
|
"max_output_tokens": 2048,
|
||||||
|
@ -1729,6 +1777,23 @@
|
||||||
"output_cost_per_token": 0.0000009,
|
"output_cost_per_token": 0.0000009,
|
||||||
"litellm_provider": "together_ai"
|
"litellm_provider": "together_ai"
|
||||||
},
|
},
|
||||||
|
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
|
||||||
|
"input_cost_per_token": 0.0000006,
|
||||||
|
"output_cost_per_token": 0.0000006,
|
||||||
|
"litellm_provider": "together_ai",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
|
},
|
||||||
|
"together_ai/mistralai/Mistral-7B-Instruct-v0.1": {
|
||||||
|
"litellm_provider": "together_ai",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
|
},
|
||||||
|
"together_ai/togethercomputer/CodeLlama-34b-Instruct": {
|
||||||
|
"litellm_provider": "together_ai",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
|
},
|
||||||
"ollama/llama2": {
|
"ollama/llama2": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"input_cost_per_token": 0.0,
|
"input_cost_per_token": 0.0,
|
||||||
|
@ -1981,7 +2046,16 @@
|
||||||
"input_cost_per_token": 0.00000015,
|
"input_cost_per_token": 0.00000015,
|
||||||
"output_cost_per_token": 0.00000015,
|
"output_cost_per_token": 0.00000015,
|
||||||
"litellm_provider": "anyscale",
|
"litellm_provider": "anyscale",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
|
},
|
||||||
|
"anyscale/Mixtral-8x7B-Instruct-v0.1": {
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"input_cost_per_token": 0.00000015,
|
||||||
|
"output_cost_per_token": 0.00000015,
|
||||||
|
"litellm_provider": "anyscale",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"anyscale/HuggingFaceH4/zephyr-7b-beta": {
|
"anyscale/HuggingFaceH4/zephyr-7b-beta": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
|
|
@ -0,0 +1 @@
|
||||||
|
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()
|
66
litellm/proxy/custom_callbacks.py
Normal file
66
litellm/proxy/custom_callbacks.py
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
|
||||||
|
# This file includes the custom callbacks for LiteLLM Proxy
|
||||||
|
# Once defined, these can be passed in proxy_config.yaml
|
||||||
|
class MyCustomHandler(CustomLogger):
|
||||||
|
def log_pre_api_call(self, model, messages, kwargs):
|
||||||
|
print(f"Pre-API Call") # noqa
|
||||||
|
|
||||||
|
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
print(f"Post-API Call") # noqa
|
||||||
|
|
||||||
|
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
print(f"On Stream") # noqa
|
||||||
|
|
||||||
|
def log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
print("On Success") # noqa
|
||||||
|
|
||||||
|
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
print(f"On Failure") # noqa
|
||||||
|
|
||||||
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
print(f"ishaan async_log_success_event") # noqa
|
||||||
|
# log: key, user, model, prompt, response, tokens, cost
|
||||||
|
# Access kwargs passed to litellm.completion()
|
||||||
|
model = kwargs.get("model", None)
|
||||||
|
messages = kwargs.get("messages", None)
|
||||||
|
user = kwargs.get("user", None)
|
||||||
|
|
||||||
|
# Access litellm_params passed to litellm.completion(), example access `metadata`
|
||||||
|
litellm_params = kwargs.get("litellm_params", {})
|
||||||
|
metadata = litellm_params.get(
|
||||||
|
"metadata", {}
|
||||||
|
) # headers passed to LiteLLM proxy, can be found here
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
try:
|
||||||
|
print(f"On Async Failure !") # noqa
|
||||||
|
print("\nkwargs", kwargs) # noqa
|
||||||
|
# Access kwargs passed to litellm.completion()
|
||||||
|
model = kwargs.get("model", None)
|
||||||
|
messages = kwargs.get("messages", None)
|
||||||
|
user = kwargs.get("user", None)
|
||||||
|
|
||||||
|
# Access litellm_params passed to litellm.completion(), example access `metadata`
|
||||||
|
litellm_params = kwargs.get("litellm_params", {})
|
||||||
|
metadata = litellm_params.get(
|
||||||
|
"metadata", {}
|
||||||
|
) # headers passed to LiteLLM proxy, can be found here
|
||||||
|
|
||||||
|
# Acess Exceptions & Traceback
|
||||||
|
exception_event = kwargs.get("exception", None)
|
||||||
|
traceback_event = kwargs.get("traceback_exception", None)
|
||||||
|
|
||||||
|
# Calculate cost using litellm.completion_cost()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Exception: {e}") # noqa
|
||||||
|
|
||||||
|
|
||||||
|
proxy_handler_instance = MyCustomHandler()
|
||||||
|
|
||||||
|
# Set litellm.callbacks = [proxy_handler_instance] on the proxy
|
||||||
|
# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
|
|
@ -45,7 +45,7 @@ litellm_settings:
|
||||||
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
|
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
|
||||||
success_callback: ['langfuse']
|
success_callback: ['langfuse']
|
||||||
# setting callback class
|
# setting callback class
|
||||||
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
|
|
|
@ -240,6 +240,8 @@ health_check_results = {}
|
||||||
queue: List = []
|
queue: List = []
|
||||||
litellm_proxy_budget_name = "litellm-proxy-budget"
|
litellm_proxy_budget_name = "litellm-proxy-budget"
|
||||||
ui_access_mode: Literal["admin", "all"] = "all"
|
ui_access_mode: Literal["admin", "all"] = "all"
|
||||||
|
proxy_budget_rescheduler_min_time = 597
|
||||||
|
proxy_budget_rescheduler_max_time = 605
|
||||||
### INITIALIZE GLOBAL LOGGING OBJECT ###
|
### INITIALIZE GLOBAL LOGGING OBJECT ###
|
||||||
proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache)
|
proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache)
|
||||||
### REDIS QUEUE ###
|
### REDIS QUEUE ###
|
||||||
|
@ -1407,7 +1409,7 @@ class ProxyConfig:
|
||||||
"""
|
"""
|
||||||
Load config values into proxy global state
|
Load config values into proxy global state
|
||||||
"""
|
"""
|
||||||
global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, ui_access_mode
|
global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, proxy_budget_rescheduler_max_time, proxy_budget_rescheduler_min_time, ui_access_mode
|
||||||
|
|
||||||
# Load existing config
|
# Load existing config
|
||||||
config = await self.get_config(config_file_path=config_file_path)
|
config = await self.get_config(config_file_path=config_file_path)
|
||||||
|
@ -1718,6 +1720,13 @@ class ProxyConfig:
|
||||||
ui_access_mode = general_settings.get(
|
ui_access_mode = general_settings.get(
|
||||||
"ui_access_mode", "all"
|
"ui_access_mode", "all"
|
||||||
) # can be either ["admin_only" or "all"]
|
) # can be either ["admin_only" or "all"]
|
||||||
|
## BUDGET RESCHEDULER ##
|
||||||
|
proxy_budget_rescheduler_min_time = general_settings.get(
|
||||||
|
"proxy_budget_rescheduler_min_time", proxy_budget_rescheduler_min_time
|
||||||
|
)
|
||||||
|
proxy_budget_rescheduler_max_time = general_settings.get(
|
||||||
|
"proxy_budget_rescheduler_max_time", proxy_budget_rescheduler_max_time
|
||||||
|
)
|
||||||
### BACKGROUND HEALTH CHECKS ###
|
### BACKGROUND HEALTH CHECKS ###
|
||||||
# Enable background health checks
|
# Enable background health checks
|
||||||
use_background_health_checks = general_settings.get(
|
use_background_health_checks = general_settings.get(
|
||||||
|
@ -2120,10 +2129,9 @@ async def async_data_generator(response, user_api_key_dict):
|
||||||
try:
|
try:
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
async for chunk in response:
|
async for chunk in response:
|
||||||
verbose_proxy_logger.debug(f"returned chunk: {chunk}")
|
chunk = chunk.model_dump_json(exclude_none=True)
|
||||||
assert isinstance(chunk, litellm.ModelResponse)
|
|
||||||
try:
|
try:
|
||||||
yield f"data: {json.dumps(chunk.model_dump(exclude_none=True))}\n\n"
|
yield f"data: {chunk}\n\n"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
yield f"data: {str(e)}\n\n"
|
yield f"data: {str(e)}\n\n"
|
||||||
|
|
||||||
|
@ -2202,7 +2210,7 @@ def parse_cache_control(cache_control):
|
||||||
|
|
||||||
@router.on_event("startup")
|
@router.on_event("startup")
|
||||||
async def startup_event():
|
async def startup_event():
|
||||||
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings
|
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time
|
||||||
import json
|
import json
|
||||||
|
|
||||||
### LOAD MASTER KEY ###
|
### LOAD MASTER KEY ###
|
||||||
|
@ -2307,13 +2315,12 @@ async def startup_event():
|
||||||
### CHECK IF VIEW EXISTS ###
|
### CHECK IF VIEW EXISTS ###
|
||||||
if prisma_client is not None:
|
if prisma_client is not None:
|
||||||
create_view_response = await prisma_client.check_view_exists()
|
create_view_response = await prisma_client.check_view_exists()
|
||||||
print(f"create_view_response: {create_view_response}") # noqa
|
|
||||||
|
|
||||||
### START BUDGET SCHEDULER ###
|
### START BUDGET SCHEDULER ###
|
||||||
if prisma_client is not None:
|
if prisma_client is not None:
|
||||||
scheduler = AsyncIOScheduler()
|
scheduler = AsyncIOScheduler()
|
||||||
interval = random.randint(
|
interval = random.randint(
|
||||||
597, 605
|
proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time
|
||||||
) # random interval, so multiple workers avoid resetting budget at the same time
|
) # random interval, so multiple workers avoid resetting budget at the same time
|
||||||
scheduler.add_job(
|
scheduler.add_job(
|
||||||
reset_budget, "interval", seconds=interval, args=[prisma_client]
|
reset_budget, "interval", seconds=interval, args=[prisma_client]
|
||||||
|
@ -3780,7 +3787,7 @@ async def view_spend_tags(
|
||||||
|
|
||||||
@router.get(
|
@router.get(
|
||||||
"/spend/logs",
|
"/spend/logs",
|
||||||
tags=["budget & spend Tracking"],
|
tags=["Budget & Spend Tracking"],
|
||||||
dependencies=[Depends(user_api_key_auth)],
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
responses={
|
responses={
|
||||||
200: {"model": List[LiteLLM_SpendLogs]},
|
200: {"model": List[LiteLLM_SpendLogs]},
|
||||||
|
@ -3839,13 +3846,55 @@ async def view_spend_logs(
|
||||||
# gettting spend logs from clickhouse
|
# gettting spend logs from clickhouse
|
||||||
from litellm.proxy.enterprise.utils import view_spend_logs_from_clickhouse
|
from litellm.proxy.enterprise.utils import view_spend_logs_from_clickhouse
|
||||||
|
|
||||||
return await view_spend_logs_from_clickhouse(
|
daily_metrics = await view_daily_metrics(
|
||||||
api_key=api_key,
|
|
||||||
user_id=user_id,
|
|
||||||
request_id=request_id,
|
|
||||||
start_date=start_date,
|
start_date=start_date,
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# get the top api keys across all daily_metrics
|
||||||
|
top_api_keys = {} # type: ignore
|
||||||
|
|
||||||
|
# make this compatible with the admin UI
|
||||||
|
for response in daily_metrics.get("daily_spend", {}):
|
||||||
|
response["startTime"] = response["day"]
|
||||||
|
response["spend"] = response["daily_spend"]
|
||||||
|
response["models"] = response["spend_per_model"]
|
||||||
|
response["users"] = {"ishaan": 0.0}
|
||||||
|
spend_per_api_key = response["spend_per_api_key"]
|
||||||
|
|
||||||
|
# insert spend_per_api_key key, values in response
|
||||||
|
for key, value in spend_per_api_key.items():
|
||||||
|
response[key] = value
|
||||||
|
top_api_keys[key] = top_api_keys.get(key, 0.0) + value
|
||||||
|
|
||||||
|
del response["day"]
|
||||||
|
del response["daily_spend"]
|
||||||
|
del response["spend_per_model"]
|
||||||
|
del response["spend_per_api_key"]
|
||||||
|
|
||||||
|
# get top 5 api keys
|
||||||
|
top_api_keys = sorted(top_api_keys.items(), key=lambda x: x[1], reverse=True) # type: ignore
|
||||||
|
top_api_keys = top_api_keys[:5] # type: ignore
|
||||||
|
top_api_keys = dict(top_api_keys) # type: ignore
|
||||||
|
"""
|
||||||
|
set it like this
|
||||||
|
{
|
||||||
|
"key" : key,
|
||||||
|
"spend:" : spend
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
# we need this to show on the Admin UI
|
||||||
|
response_keys = []
|
||||||
|
for key in top_api_keys.items():
|
||||||
|
response_keys.append(
|
||||||
|
{
|
||||||
|
"key": key[0],
|
||||||
|
"spend": key[1],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
daily_metrics["top_api_keys"] = response_keys
|
||||||
|
|
||||||
|
return daily_metrics
|
||||||
global prisma_client
|
global prisma_client
|
||||||
try:
|
try:
|
||||||
verbose_proxy_logger.debug("inside view_spend_logs")
|
verbose_proxy_logger.debug("inside view_spend_logs")
|
||||||
|
@ -3998,6 +4047,142 @@ async def view_spend_logs(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/global/spend/logs",
|
||||||
|
tags=["Budget & Spend Tracking"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
|
async def global_spend_logs():
|
||||||
|
"""
|
||||||
|
[BETA] This is a beta endpoint. It will change.
|
||||||
|
|
||||||
|
Use this to get global spend (spend per day for last 30d). Admin-only endpoint
|
||||||
|
|
||||||
|
More efficient implementation of /spend/logs, by creating a view over the spend logs table.
|
||||||
|
"""
|
||||||
|
global prisma_client
|
||||||
|
|
||||||
|
sql_query = """SELECT * FROM "MonthlyGlobalSpend";"""
|
||||||
|
|
||||||
|
response = await prisma_client.db.query_raw(query=sql_query)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/global/spend/keys",
|
||||||
|
tags=["Budget & Spend Tracking"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
|
async def global_spend_keys(
|
||||||
|
limit: int = fastapi.Query(
|
||||||
|
default=None,
|
||||||
|
description="Number of keys to get. Will return Top 'n' keys.",
|
||||||
|
)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
[BETA] This is a beta endpoint. It will change.
|
||||||
|
|
||||||
|
Use this to get the top 'n' keys with the highest spend, ordered by spend.
|
||||||
|
"""
|
||||||
|
global prisma_client
|
||||||
|
|
||||||
|
if prisma_client is None:
|
||||||
|
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||||
|
sql_query = f"""SELECT * FROM "Last30dKeysBySpend" LIMIT {limit};"""
|
||||||
|
|
||||||
|
response = await prisma_client.db.query_raw(query=sql_query)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/global/spend/models",
|
||||||
|
tags=["Budget & Spend Tracking"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
|
async def global_spend_models(
|
||||||
|
limit: int = fastapi.Query(
|
||||||
|
default=None,
|
||||||
|
description="Number of models to get. Will return Top 'n' models.",
|
||||||
|
)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
[BETA] This is a beta endpoint. It will change.
|
||||||
|
|
||||||
|
Use this to get the top 'n' keys with the highest spend, ordered by spend.
|
||||||
|
"""
|
||||||
|
global prisma_client
|
||||||
|
|
||||||
|
if prisma_client is None:
|
||||||
|
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||||
|
|
||||||
|
sql_query = f"""SELECT * FROM "Last30dModelsBySpend" LIMIT {limit};"""
|
||||||
|
|
||||||
|
response = await prisma_client.db.query_raw(query=sql_query)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/daily_metrics",
|
||||||
|
summary="Get daily spend metrics",
|
||||||
|
tags=["budget & spend Tracking"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
|
async def view_daily_metrics(
|
||||||
|
start_date: Optional[str] = fastapi.Query(
|
||||||
|
default=None,
|
||||||
|
description="Time from which to start viewing key spend",
|
||||||
|
),
|
||||||
|
end_date: Optional[str] = fastapi.Query(
|
||||||
|
default=None,
|
||||||
|
description="Time till which to view key spend",
|
||||||
|
),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
[BETA] This is a beta endpoint. It might change without notice.
|
||||||
|
|
||||||
|
Please give feedback - https://github.com/BerriAI/litellm/issues
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if os.getenv("CLICKHOUSE_HOST") is not None:
|
||||||
|
# gettting spend logs from clickhouse
|
||||||
|
from litellm.integrations import clickhouse
|
||||||
|
|
||||||
|
return clickhouse.build_daily_metrics()
|
||||||
|
|
||||||
|
# create a response object
|
||||||
|
"""
|
||||||
|
{
|
||||||
|
"date": "2022-01-01",
|
||||||
|
"spend": 0.0,
|
||||||
|
"users": {},
|
||||||
|
"models": {},
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
else:
|
||||||
|
raise Exception(
|
||||||
|
"Clickhouse: Clickhouse host not set. Required for viewing /daily/metrics"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
if isinstance(e, HTTPException):
|
||||||
|
raise ProxyException(
|
||||||
|
message=getattr(e, "detail", f"/spend/logs Error({str(e)})"),
|
||||||
|
type="internal_error",
|
||||||
|
param=getattr(e, "param", "None"),
|
||||||
|
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
|
||||||
|
)
|
||||||
|
elif isinstance(e, ProxyException):
|
||||||
|
raise e
|
||||||
|
raise ProxyException(
|
||||||
|
message="/spend/logs Error" + str(e),
|
||||||
|
type="internal_error",
|
||||||
|
param=getattr(e, "param", "None"),
|
||||||
|
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
#### USER MANAGEMENT ####
|
#### USER MANAGEMENT ####
|
||||||
@router.post(
|
@router.post(
|
||||||
"/user/new",
|
"/user/new",
|
||||||
|
|
|
@ -489,18 +489,20 @@ class PrismaClient:
|
||||||
)
|
)
|
||||||
async def check_view_exists(self):
|
async def check_view_exists(self):
|
||||||
"""
|
"""
|
||||||
Checks if the LiteLLM_VerificationTokenView exists in the user's db.
|
Checks if the LiteLLM_VerificationTokenView and MonthlyGlobalSpend exists in the user's db.
|
||||||
|
|
||||||
This is used for getting the token + team data in user_api_key_auth
|
LiteLLM_VerificationTokenView: This view is used for getting the token + team data in user_api_key_auth
|
||||||
|
|
||||||
|
MonthlyGlobalSpend: This view is used for the admin view to see global spend for this month
|
||||||
|
|
||||||
If the view doesn't exist, one will be created.
|
If the view doesn't exist, one will be created.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Try to select one row from the view
|
# Try to select one row from the view
|
||||||
await self.db.execute_raw(
|
await self.db.query_raw(
|
||||||
"""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1"""
|
"""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1"""
|
||||||
)
|
)
|
||||||
return "LiteLLM_VerificationTokenView Exists!"
|
print("LiteLLM_VerificationTokenView Exists!") # noqa
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# If an error occurs, the view does not exist, so create it
|
# If an error occurs, the view does not exist, so create it
|
||||||
value = await self.health_check()
|
value = await self.health_check()
|
||||||
|
@ -518,7 +520,29 @@ class PrismaClient:
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
return "LiteLLM_VerificationTokenView Created!"
|
print("LiteLLM_VerificationTokenView Created!") # noqa
|
||||||
|
|
||||||
|
try:
|
||||||
|
await self.db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""")
|
||||||
|
print("MonthlyGlobalSpend Exists!") # noqa
|
||||||
|
except Exception as e:
|
||||||
|
sql_query = """
|
||||||
|
CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS
|
||||||
|
SELECT
|
||||||
|
DATE("startTime") AS date,
|
||||||
|
SUM("spend") AS spend
|
||||||
|
FROM
|
||||||
|
"LiteLLM_SpendLogs"
|
||||||
|
WHERE
|
||||||
|
"startTime" >= (CURRENT_DATE - INTERVAL '30 days')
|
||||||
|
GROUP BY
|
||||||
|
DATE("startTime");
|
||||||
|
"""
|
||||||
|
await self.db.execute_raw(query=sql_query)
|
||||||
|
|
||||||
|
print("MonthlyGlobalSpend Created!") # noqa
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
@backoff.on_exception(
|
@backoff.on_exception(
|
||||||
backoff.expo,
|
backoff.expo,
|
||||||
|
|
|
@ -1,253 +1,254 @@
|
||||||
import sys
|
## @pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
import os
|
# import sys
|
||||||
import io, asyncio
|
# import os
|
||||||
|
# import io, asyncio
|
||||||
|
|
||||||
# import logging
|
# # import logging
|
||||||
# logging.basicConfig(level=logging.DEBUG)
|
# # logging.basicConfig(level=logging.DEBUG)
|
||||||
sys.path.insert(0, os.path.abspath("../.."))
|
# sys.path.insert(0, os.path.abspath("../.."))
|
||||||
|
|
||||||
from litellm import completion
|
# from litellm import completion
|
||||||
import litellm
|
# import litellm
|
||||||
|
|
||||||
litellm.num_retries = 3
|
# litellm.num_retries = 3
|
||||||
|
|
||||||
import time, random
|
# import time, random
|
||||||
import pytest
|
# import pytest
|
||||||
|
|
||||||
|
|
||||||
def test_s3_logging():
|
# def test_s3_logging():
|
||||||
# all s3 requests need to be in one test function
|
# # all s3 requests need to be in one test function
|
||||||
# since we are modifying stdout, and pytests runs tests in parallel
|
# # since we are modifying stdout, and pytests runs tests in parallel
|
||||||
# on circle ci - we only test litellm.acompletion()
|
# # on circle ci - we only test litellm.acompletion()
|
||||||
try:
|
# try:
|
||||||
# redirect stdout to log_file
|
# # redirect stdout to log_file
|
||||||
litellm.cache = litellm.Cache(
|
|
||||||
type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
|
|
||||||
)
|
|
||||||
|
|
||||||
litellm.success_callback = ["s3"]
|
|
||||||
litellm.s3_callback_params = {
|
|
||||||
"s3_bucket_name": "litellm-logs",
|
|
||||||
"s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
|
|
||||||
"s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
|
|
||||||
}
|
|
||||||
litellm.set_verbose = True
|
|
||||||
|
|
||||||
print("Testing async s3 logging")
|
|
||||||
|
|
||||||
expected_keys = []
|
|
||||||
|
|
||||||
import time
|
|
||||||
|
|
||||||
curr_time = str(time.time())
|
|
||||||
|
|
||||||
async def _test():
|
|
||||||
return await litellm.acompletion(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
|
||||||
max_tokens=10,
|
|
||||||
temperature=0.7,
|
|
||||||
user="ishaan-2",
|
|
||||||
)
|
|
||||||
|
|
||||||
response = asyncio.run(_test())
|
|
||||||
print(f"response: {response}")
|
|
||||||
expected_keys.append(response.id)
|
|
||||||
|
|
||||||
async def _test():
|
|
||||||
return await litellm.acompletion(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
|
||||||
max_tokens=10,
|
|
||||||
temperature=0.7,
|
|
||||||
user="ishaan-2",
|
|
||||||
)
|
|
||||||
|
|
||||||
response = asyncio.run(_test())
|
|
||||||
expected_keys.append(response.id)
|
|
||||||
print(f"response: {response}")
|
|
||||||
time.sleep(5) # wait 5s for logs to land
|
|
||||||
|
|
||||||
import boto3
|
|
||||||
|
|
||||||
s3 = boto3.client("s3")
|
|
||||||
bucket_name = "litellm-logs"
|
|
||||||
# List objects in the bucket
|
|
||||||
response = s3.list_objects(Bucket=bucket_name)
|
|
||||||
|
|
||||||
# Sort the objects based on the LastModified timestamp
|
|
||||||
objects = sorted(
|
|
||||||
response["Contents"], key=lambda x: x["LastModified"], reverse=True
|
|
||||||
)
|
|
||||||
# Get the keys of the most recent objects
|
|
||||||
most_recent_keys = [obj["Key"] for obj in objects]
|
|
||||||
print(most_recent_keys)
|
|
||||||
# for each key, get the part before "-" as the key. Do it safely
|
|
||||||
cleaned_keys = []
|
|
||||||
for key in most_recent_keys:
|
|
||||||
split_key = key.split("_")
|
|
||||||
if len(split_key) < 2:
|
|
||||||
continue
|
|
||||||
cleaned_keys.append(split_key[1])
|
|
||||||
print("\n most recent keys", most_recent_keys)
|
|
||||||
print("\n cleaned keys", cleaned_keys)
|
|
||||||
print("\n Expected keys: ", expected_keys)
|
|
||||||
matches = 0
|
|
||||||
for key in expected_keys:
|
|
||||||
key += ".json"
|
|
||||||
assert key in cleaned_keys
|
|
||||||
|
|
||||||
if key in cleaned_keys:
|
|
||||||
matches += 1
|
|
||||||
# remove the match key
|
|
||||||
cleaned_keys.remove(key)
|
|
||||||
# this asserts we log, the first request + the 2nd cached request
|
|
||||||
print("we had two matches ! passed ", matches)
|
|
||||||
assert matches == 2
|
|
||||||
try:
|
|
||||||
# cleanup s3 bucket in test
|
|
||||||
for key in most_recent_keys:
|
|
||||||
s3.delete_object(Bucket=bucket_name, Key=key)
|
|
||||||
except:
|
|
||||||
# don't let cleanup fail a test
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
|
||||||
finally:
|
|
||||||
# post, close log file and verify
|
|
||||||
# Reset stdout to the original value
|
|
||||||
print("Passed! Testing async s3 logging")
|
|
||||||
|
|
||||||
|
|
||||||
# test_s3_logging()
|
|
||||||
|
|
||||||
|
|
||||||
def test_s3_logging_async():
|
|
||||||
# this tests time added to make s3 logging calls, vs just acompletion calls
|
|
||||||
try:
|
|
||||||
litellm.set_verbose = True
|
|
||||||
# Make 5 calls with an empty success_callback
|
|
||||||
litellm.success_callback = []
|
|
||||||
start_time_empty_callback = asyncio.run(make_async_calls())
|
|
||||||
print("done with no callback test")
|
|
||||||
|
|
||||||
print("starting s3 logging load test")
|
|
||||||
# Make 5 calls with success_callback set to "langfuse"
|
|
||||||
litellm.success_callback = ["s3"]
|
|
||||||
litellm.s3_callback_params = {
|
|
||||||
"s3_bucket_name": "litellm-logs",
|
|
||||||
"s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
|
|
||||||
"s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
|
|
||||||
}
|
|
||||||
start_time_s3 = asyncio.run(make_async_calls())
|
|
||||||
print("done with s3 test")
|
|
||||||
|
|
||||||
# Compare the time for both scenarios
|
|
||||||
print(f"Time taken with success_callback='s3': {start_time_s3}")
|
|
||||||
print(f"Time taken with empty success_callback: {start_time_empty_callback}")
|
|
||||||
|
|
||||||
# assert the diff is not more than 1 second
|
|
||||||
assert abs(start_time_s3 - start_time_empty_callback) < 1
|
|
||||||
|
|
||||||
except litellm.Timeout as e:
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
|
||||||
|
|
||||||
|
|
||||||
async def make_async_calls():
|
|
||||||
tasks = []
|
|
||||||
for _ in range(5):
|
|
||||||
task = asyncio.create_task(
|
|
||||||
litellm.acompletion(
|
|
||||||
model="azure/chatgpt-v-2",
|
|
||||||
messages=[{"role": "user", "content": "This is a test"}],
|
|
||||||
max_tokens=5,
|
|
||||||
temperature=0.7,
|
|
||||||
timeout=5,
|
|
||||||
user="langfuse_latency_test_user",
|
|
||||||
mock_response="It's simple to use and easy to get started",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
tasks.append(task)
|
|
||||||
|
|
||||||
# Measure the start time before running the tasks
|
|
||||||
start_time = asyncio.get_event_loop().time()
|
|
||||||
|
|
||||||
# Wait for all tasks to complete
|
|
||||||
responses = await asyncio.gather(*tasks)
|
|
||||||
|
|
||||||
# Print the responses when tasks return
|
|
||||||
for idx, response in enumerate(responses):
|
|
||||||
print(f"Response from Task {idx + 1}: {response}")
|
|
||||||
|
|
||||||
# Calculate the total time taken
|
|
||||||
total_time = asyncio.get_event_loop().time() - start_time
|
|
||||||
|
|
||||||
return total_time
|
|
||||||
|
|
||||||
|
|
||||||
def test_s3_logging_r2():
|
|
||||||
# all s3 requests need to be in one test function
|
|
||||||
# since we are modifying stdout, and pytests runs tests in parallel
|
|
||||||
# on circle ci - we only test litellm.acompletion()
|
|
||||||
try:
|
|
||||||
# redirect stdout to log_file
|
|
||||||
# litellm.cache = litellm.Cache(
|
# litellm.cache = litellm.Cache(
|
||||||
# type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2"
|
# type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
|
||||||
# )
|
# )
|
||||||
litellm.set_verbose = True
|
|
||||||
from litellm._logging import verbose_logger
|
|
||||||
import logging
|
|
||||||
|
|
||||||
verbose_logger.setLevel(level=logging.DEBUG)
|
# litellm.success_callback = ["s3"]
|
||||||
|
# litellm.s3_callback_params = {
|
||||||
|
# "s3_bucket_name": "litellm-logs",
|
||||||
|
# "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
|
||||||
|
# "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
|
||||||
|
# }
|
||||||
|
# litellm.set_verbose = True
|
||||||
|
|
||||||
litellm.success_callback = ["s3"]
|
# print("Testing async s3 logging")
|
||||||
litellm.s3_callback_params = {
|
|
||||||
"s3_bucket_name": "litellm-r2-bucket",
|
|
||||||
"s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY",
|
|
||||||
"s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID",
|
|
||||||
"s3_endpoint_url": "os.environ/R2_S3_URL",
|
|
||||||
"s3_region_name": "os.environ/R2_S3_REGION_NAME",
|
|
||||||
}
|
|
||||||
print("Testing async s3 logging")
|
|
||||||
|
|
||||||
expected_keys = []
|
# expected_keys = []
|
||||||
|
|
||||||
import time
|
# import time
|
||||||
|
|
||||||
curr_time = str(time.time())
|
# curr_time = str(time.time())
|
||||||
|
|
||||||
async def _test():
|
# async def _test():
|
||||||
return await litellm.acompletion(
|
# return await litellm.acompletion(
|
||||||
model="gpt-3.5-turbo",
|
# model="gpt-3.5-turbo",
|
||||||
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
||||||
max_tokens=10,
|
# max_tokens=10,
|
||||||
temperature=0.7,
|
# temperature=0.7,
|
||||||
user="ishaan-2",
|
# user="ishaan-2",
|
||||||
)
|
# )
|
||||||
|
|
||||||
response = asyncio.run(_test())
|
# response = asyncio.run(_test())
|
||||||
print(f"response: {response}")
|
# print(f"response: {response}")
|
||||||
expected_keys.append(response.id)
|
# expected_keys.append(response.id)
|
||||||
|
|
||||||
import boto3
|
# async def _test():
|
||||||
|
# return await litellm.acompletion(
|
||||||
|
# model="gpt-3.5-turbo",
|
||||||
|
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
||||||
|
# max_tokens=10,
|
||||||
|
# temperature=0.7,
|
||||||
|
# user="ishaan-2",
|
||||||
|
# )
|
||||||
|
|
||||||
s3 = boto3.client(
|
# response = asyncio.run(_test())
|
||||||
"s3",
|
# expected_keys.append(response.id)
|
||||||
endpoint_url=os.getenv("R2_S3_URL"),
|
# print(f"response: {response}")
|
||||||
region_name=os.getenv("R2_S3_REGION_NAME"),
|
# time.sleep(5) # wait 5s for logs to land
|
||||||
aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"),
|
|
||||||
aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"),
|
|
||||||
)
|
|
||||||
|
|
||||||
bucket_name = "litellm-r2-bucket"
|
# import boto3
|
||||||
# List objects in the bucket
|
|
||||||
response = s3.list_objects(Bucket=bucket_name)
|
|
||||||
|
|
||||||
except Exception as e:
|
# s3 = boto3.client("s3")
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
# bucket_name = "litellm-logs"
|
||||||
finally:
|
# # List objects in the bucket
|
||||||
# post, close log file and verify
|
# response = s3.list_objects(Bucket=bucket_name)
|
||||||
# Reset stdout to the original value
|
|
||||||
print("Passed! Testing async s3 logging")
|
# # Sort the objects based on the LastModified timestamp
|
||||||
|
# objects = sorted(
|
||||||
|
# response["Contents"], key=lambda x: x["LastModified"], reverse=True
|
||||||
|
# )
|
||||||
|
# # Get the keys of the most recent objects
|
||||||
|
# most_recent_keys = [obj["Key"] for obj in objects]
|
||||||
|
# print(most_recent_keys)
|
||||||
|
# # for each key, get the part before "-" as the key. Do it safely
|
||||||
|
# cleaned_keys = []
|
||||||
|
# for key in most_recent_keys:
|
||||||
|
# split_key = key.split("_")
|
||||||
|
# if len(split_key) < 2:
|
||||||
|
# continue
|
||||||
|
# cleaned_keys.append(split_key[1])
|
||||||
|
# print("\n most recent keys", most_recent_keys)
|
||||||
|
# print("\n cleaned keys", cleaned_keys)
|
||||||
|
# print("\n Expected keys: ", expected_keys)
|
||||||
|
# matches = 0
|
||||||
|
# for key in expected_keys:
|
||||||
|
# key += ".json"
|
||||||
|
# assert key in cleaned_keys
|
||||||
|
|
||||||
|
# if key in cleaned_keys:
|
||||||
|
# matches += 1
|
||||||
|
# # remove the match key
|
||||||
|
# cleaned_keys.remove(key)
|
||||||
|
# # this asserts we log, the first request + the 2nd cached request
|
||||||
|
# print("we had two matches ! passed ", matches)
|
||||||
|
# assert matches == 2
|
||||||
|
# try:
|
||||||
|
# # cleanup s3 bucket in test
|
||||||
|
# for key in most_recent_keys:
|
||||||
|
# s3.delete_object(Bucket=bucket_name, Key=key)
|
||||||
|
# except:
|
||||||
|
# # don't let cleanup fail a test
|
||||||
|
# pass
|
||||||
|
# except Exception as e:
|
||||||
|
# pytest.fail(f"An exception occurred - {e}")
|
||||||
|
# finally:
|
||||||
|
# # post, close log file and verify
|
||||||
|
# # Reset stdout to the original value
|
||||||
|
# print("Passed! Testing async s3 logging")
|
||||||
|
|
||||||
|
|
||||||
|
# # test_s3_logging()
|
||||||
|
|
||||||
|
|
||||||
|
# def test_s3_logging_async():
|
||||||
|
# # this tests time added to make s3 logging calls, vs just acompletion calls
|
||||||
|
# try:
|
||||||
|
# litellm.set_verbose = True
|
||||||
|
# # Make 5 calls with an empty success_callback
|
||||||
|
# litellm.success_callback = []
|
||||||
|
# start_time_empty_callback = asyncio.run(make_async_calls())
|
||||||
|
# print("done with no callback test")
|
||||||
|
|
||||||
|
# print("starting s3 logging load test")
|
||||||
|
# # Make 5 calls with success_callback set to "langfuse"
|
||||||
|
# litellm.success_callback = ["s3"]
|
||||||
|
# litellm.s3_callback_params = {
|
||||||
|
# "s3_bucket_name": "litellm-logs",
|
||||||
|
# "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
|
||||||
|
# "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
|
||||||
|
# }
|
||||||
|
# start_time_s3 = asyncio.run(make_async_calls())
|
||||||
|
# print("done with s3 test")
|
||||||
|
|
||||||
|
# # Compare the time for both scenarios
|
||||||
|
# print(f"Time taken with success_callback='s3': {start_time_s3}")
|
||||||
|
# print(f"Time taken with empty success_callback: {start_time_empty_callback}")
|
||||||
|
|
||||||
|
# # assert the diff is not more than 1 second
|
||||||
|
# assert abs(start_time_s3 - start_time_empty_callback) < 1
|
||||||
|
|
||||||
|
# except litellm.Timeout as e:
|
||||||
|
# pass
|
||||||
|
# except Exception as e:
|
||||||
|
# pytest.fail(f"An exception occurred - {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# async def make_async_calls():
|
||||||
|
# tasks = []
|
||||||
|
# for _ in range(5):
|
||||||
|
# task = asyncio.create_task(
|
||||||
|
# litellm.acompletion(
|
||||||
|
# model="azure/chatgpt-v-2",
|
||||||
|
# messages=[{"role": "user", "content": "This is a test"}],
|
||||||
|
# max_tokens=5,
|
||||||
|
# temperature=0.7,
|
||||||
|
# timeout=5,
|
||||||
|
# user="langfuse_latency_test_user",
|
||||||
|
# mock_response="It's simple to use and easy to get started",
|
||||||
|
# )
|
||||||
|
# )
|
||||||
|
# tasks.append(task)
|
||||||
|
|
||||||
|
# # Measure the start time before running the tasks
|
||||||
|
# start_time = asyncio.get_event_loop().time()
|
||||||
|
|
||||||
|
# # Wait for all tasks to complete
|
||||||
|
# responses = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
# # Print the responses when tasks return
|
||||||
|
# for idx, response in enumerate(responses):
|
||||||
|
# print(f"Response from Task {idx + 1}: {response}")
|
||||||
|
|
||||||
|
# # Calculate the total time taken
|
||||||
|
# total_time = asyncio.get_event_loop().time() - start_time
|
||||||
|
|
||||||
|
# return total_time
|
||||||
|
|
||||||
|
|
||||||
|
# def test_s3_logging_r2():
|
||||||
|
# # all s3 requests need to be in one test function
|
||||||
|
# # since we are modifying stdout, and pytests runs tests in parallel
|
||||||
|
# # on circle ci - we only test litellm.acompletion()
|
||||||
|
# try:
|
||||||
|
# # redirect stdout to log_file
|
||||||
|
# # litellm.cache = litellm.Cache(
|
||||||
|
# # type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2"
|
||||||
|
# # )
|
||||||
|
# litellm.set_verbose = True
|
||||||
|
# from litellm._logging import verbose_logger
|
||||||
|
# import logging
|
||||||
|
|
||||||
|
# verbose_logger.setLevel(level=logging.DEBUG)
|
||||||
|
|
||||||
|
# litellm.success_callback = ["s3"]
|
||||||
|
# litellm.s3_callback_params = {
|
||||||
|
# "s3_bucket_name": "litellm-r2-bucket",
|
||||||
|
# "s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY",
|
||||||
|
# "s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID",
|
||||||
|
# "s3_endpoint_url": "os.environ/R2_S3_URL",
|
||||||
|
# "s3_region_name": "os.environ/R2_S3_REGION_NAME",
|
||||||
|
# }
|
||||||
|
# print("Testing async s3 logging")
|
||||||
|
|
||||||
|
# expected_keys = []
|
||||||
|
|
||||||
|
# import time
|
||||||
|
|
||||||
|
# curr_time = str(time.time())
|
||||||
|
|
||||||
|
# async def _test():
|
||||||
|
# return await litellm.acompletion(
|
||||||
|
# model="gpt-3.5-turbo",
|
||||||
|
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
||||||
|
# max_tokens=10,
|
||||||
|
# temperature=0.7,
|
||||||
|
# user="ishaan-2",
|
||||||
|
# )
|
||||||
|
|
||||||
|
# response = asyncio.run(_test())
|
||||||
|
# print(f"response: {response}")
|
||||||
|
# expected_keys.append(response.id)
|
||||||
|
|
||||||
|
# import boto3
|
||||||
|
|
||||||
|
# s3 = boto3.client(
|
||||||
|
# "s3",
|
||||||
|
# endpoint_url=os.getenv("R2_S3_URL"),
|
||||||
|
# region_name=os.getenv("R2_S3_REGION_NAME"),
|
||||||
|
# aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"),
|
||||||
|
# aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"),
|
||||||
|
# )
|
||||||
|
|
||||||
|
# bucket_name = "litellm-r2-bucket"
|
||||||
|
# # List objects in the bucket
|
||||||
|
# response = s3.list_objects(Bucket=bucket_name)
|
||||||
|
|
||||||
|
# except Exception as e:
|
||||||
|
# pytest.fail(f"An exception occurred - {e}")
|
||||||
|
# finally:
|
||||||
|
# # post, close log file and verify
|
||||||
|
# # Reset stdout to the original value
|
||||||
|
# print("Passed! Testing async s3 logging")
|
||||||
|
|
|
@ -130,6 +130,8 @@ def test_vertex_ai():
|
||||||
f"response.choices[0].finish_reason: {response.choices[0].finish_reason}"
|
f"response.choices[0].finish_reason: {response.choices[0].finish_reason}"
|
||||||
)
|
)
|
||||||
assert response.choices[0].finish_reason in litellm._openai_finish_reasons
|
assert response.choices[0].finish_reason in litellm._openai_finish_reasons
|
||||||
|
except litellm.RateLimitError as e:
|
||||||
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
@ -183,6 +185,8 @@ def test_vertex_ai_stream():
|
||||||
assert type(content) == str
|
assert type(content) == str
|
||||||
# pass
|
# pass
|
||||||
assert len(completed_str) > 4
|
assert len(completed_str) > 4
|
||||||
|
except litellm.RateLimitError as e:
|
||||||
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
|
@ -193,16 +193,26 @@ async def test_hf_completion_tgi():
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
except litellm.APIError as e:
|
except litellm.APIError as e:
|
||||||
|
print("got an api error")
|
||||||
pass
|
pass
|
||||||
except litellm.Timeout as e:
|
except litellm.Timeout as e:
|
||||||
|
print("got a timeout error")
|
||||||
|
pass
|
||||||
|
except litellm.RateLimitError as e:
|
||||||
|
# this will catch the model is overloaded error
|
||||||
|
print("got a rate limit error")
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
if "Model is overloaded" in str(e):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# test_get_cloudflare_response_streaming()
|
# test_get_cloudflare_response_streaming()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_completion_sagemaker():
|
async def test_completion_sagemaker():
|
||||||
# litellm.set_verbose=True
|
# litellm.set_verbose=True
|
||||||
|
|
|
@ -1,257 +1,259 @@
|
||||||
import sys, os
|
# @pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
import traceback
|
# import sys, os
|
||||||
from dotenv import load_dotenv
|
# import traceback
|
||||||
|
# from dotenv import load_dotenv
|
||||||
|
|
||||||
load_dotenv()
|
# load_dotenv()
|
||||||
import os, io
|
# import os, io
|
||||||
|
|
||||||
sys.path.insert(
|
# sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
# 0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the system path
|
# ) # Adds the parent directory to the system path
|
||||||
import pytest
|
# import pytest
|
||||||
import litellm
|
# import litellm
|
||||||
from litellm import embedding, completion, completion_cost, Timeout
|
# from litellm import embedding, completion, completion_cost, Timeout
|
||||||
from litellm import RateLimitError
|
# from litellm import RateLimitError
|
||||||
|
|
||||||
# litellm.num_retries = 3
|
# # litellm.num_retries = 3
|
||||||
litellm.cache = None
|
# litellm.cache = None
|
||||||
litellm.success_callback = []
|
# litellm.success_callback = []
|
||||||
user_message = "Write a short poem about the sky"
|
# user_message = "Write a short poem about the sky"
|
||||||
messages = [{"content": user_message, "role": "user"}]
|
# messages = [{"content": user_message, "role": "user"}]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
# @pytest.fixture(autouse=True)
|
||||||
def reset_callbacks():
|
# def reset_callbacks():
|
||||||
print("\npytest fixture - resetting callbacks")
|
# print("\npytest fixture - resetting callbacks")
|
||||||
litellm.success_callback = []
|
# litellm.success_callback = []
|
||||||
litellm._async_success_callback = []
|
# litellm._async_success_callback = []
|
||||||
litellm.failure_callback = []
|
# litellm.failure_callback = []
|
||||||
litellm.callbacks = []
|
# litellm.callbacks = []
|
||||||
|
|
||||||
|
|
||||||
def test_completion_bedrock_claude_completion_auth():
|
# def test_completion_bedrock_claude_completion_auth():
|
||||||
print("calling bedrock claude completion params auth")
|
# print("calling bedrock claude completion params auth")
|
||||||
import os
|
# import os
|
||||||
|
|
||||||
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||||
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||||
|
|
||||||
os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||||
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||||
os.environ.pop("AWS_REGION_NAME", None)
|
# os.environ.pop("AWS_REGION_NAME", None)
|
||||||
|
|
||||||
try:
|
# try:
|
||||||
response = completion(
|
# response = completion(
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
# model="bedrock/anthropic.claude-instant-v1",
|
||||||
messages=messages,
|
# messages=messages,
|
||||||
max_tokens=10,
|
# max_tokens=10,
|
||||||
temperature=0.1,
|
# temperature=0.1,
|
||||||
aws_access_key_id=aws_access_key_id,
|
# aws_access_key_id=aws_access_key_id,
|
||||||
aws_secret_access_key=aws_secret_access_key,
|
# aws_secret_access_key=aws_secret_access_key,
|
||||||
aws_region_name=aws_region_name,
|
# aws_region_name=aws_region_name,
|
||||||
)
|
# )
|
||||||
# Add any assertions here to check the response
|
# # Add any assertions here to check the response
|
||||||
print(response)
|
# print(response)
|
||||||
|
|
||||||
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||||
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||||
os.environ["AWS_REGION_NAME"] = aws_region_name
|
# os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||||
except RateLimitError:
|
# except RateLimitError:
|
||||||
pass
|
# pass
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# test_completion_bedrock_claude_completion_auth()
|
# # test_completion_bedrock_claude_completion_auth()
|
||||||
|
|
||||||
|
|
||||||
def test_completion_bedrock_claude_2_1_completion_auth():
|
# def test_completion_bedrock_claude_2_1_completion_auth():
|
||||||
print("calling bedrock claude 2.1 completion params auth")
|
# print("calling bedrock claude 2.1 completion params auth")
|
||||||
import os
|
# import os
|
||||||
|
|
||||||
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||||
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||||
|
|
||||||
os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||||
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||||
os.environ.pop("AWS_REGION_NAME", None)
|
# os.environ.pop("AWS_REGION_NAME", None)
|
||||||
try:
|
# try:
|
||||||
response = completion(
|
# response = completion(
|
||||||
model="bedrock/anthropic.claude-v2:1",
|
# model="bedrock/anthropic.claude-v2:1",
|
||||||
messages=messages,
|
# messages=messages,
|
||||||
max_tokens=10,
|
# max_tokens=10,
|
||||||
temperature=0.1,
|
# temperature=0.1,
|
||||||
aws_access_key_id=aws_access_key_id,
|
# aws_access_key_id=aws_access_key_id,
|
||||||
aws_secret_access_key=aws_secret_access_key,
|
# aws_secret_access_key=aws_secret_access_key,
|
||||||
aws_region_name=aws_region_name,
|
# aws_region_name=aws_region_name,
|
||||||
)
|
# )
|
||||||
# Add any assertions here to check the response
|
# # Add any assertions here to check the response
|
||||||
print(response)
|
# print(response)
|
||||||
|
|
||||||
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||||
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||||
os.environ["AWS_REGION_NAME"] = aws_region_name
|
# os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||||
except RateLimitError:
|
# except RateLimitError:
|
||||||
pass
|
# pass
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# test_completion_bedrock_claude_2_1_completion_auth()
|
# # test_completion_bedrock_claude_2_1_completion_auth()
|
||||||
|
|
||||||
|
|
||||||
def test_completion_bedrock_claude_external_client_auth():
|
# def test_completion_bedrock_claude_external_client_auth():
|
||||||
print("\ncalling bedrock claude external client auth")
|
# print("\ncalling bedrock claude external client auth")
|
||||||
import os
|
# import os
|
||||||
|
|
||||||
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||||
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||||
|
|
||||||
os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||||
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||||
os.environ.pop("AWS_REGION_NAME", None)
|
# os.environ.pop("AWS_REGION_NAME", None)
|
||||||
|
|
||||||
try:
|
# try:
|
||||||
import boto3
|
# import boto3
|
||||||
|
|
||||||
litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
|
|
||||||
bedrock = boto3.client(
|
# bedrock = boto3.client(
|
||||||
service_name="bedrock-runtime",
|
# service_name="bedrock-runtime",
|
||||||
region_name=aws_region_name,
|
# region_name=aws_region_name,
|
||||||
aws_access_key_id=aws_access_key_id,
|
# aws_access_key_id=aws_access_key_id,
|
||||||
aws_secret_access_key=aws_secret_access_key,
|
# aws_secret_access_key=aws_secret_access_key,
|
||||||
endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
|
# endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
|
||||||
)
|
# )
|
||||||
|
|
||||||
response = completion(
|
# response = completion(
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
# model="bedrock/anthropic.claude-instant-v1",
|
||||||
messages=messages,
|
# messages=messages,
|
||||||
max_tokens=10,
|
# max_tokens=10,
|
||||||
temperature=0.1,
|
# temperature=0.1,
|
||||||
aws_bedrock_client=bedrock,
|
# aws_bedrock_client=bedrock,
|
||||||
)
|
# )
|
||||||
# Add any assertions here to check the response
|
# # Add any assertions here to check the response
|
||||||
print(response)
|
# print(response)
|
||||||
|
|
||||||
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||||
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||||
os.environ["AWS_REGION_NAME"] = aws_region_name
|
# os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||||
except RateLimitError:
|
# except RateLimitError:
|
||||||
pass
|
# pass
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# test_completion_bedrock_claude_external_client_auth()
|
# # test_completion_bedrock_claude_external_client_auth()
|
||||||
|
|
||||||
|
|
||||||
def test_completion_bedrock_claude_sts_client_auth():
|
# @pytest.mark.skip(reason="Expired token, need to renew")
|
||||||
print("\ncalling bedrock claude external client auth")
|
# def test_completion_bedrock_claude_sts_client_auth():
|
||||||
import os
|
# print("\ncalling bedrock claude external client auth")
|
||||||
|
# import os
|
||||||
|
|
||||||
aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
|
# aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
|
||||||
aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
|
# aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
|
||||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||||
aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
||||||
|
|
||||||
try:
|
# try:
|
||||||
import boto3
|
# import boto3
|
||||||
|
|
||||||
litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
|
|
||||||
response = completion(
|
# response = completion(
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
# model="bedrock/anthropic.claude-instant-v1",
|
||||||
messages=messages,
|
# messages=messages,
|
||||||
max_tokens=10,
|
# max_tokens=10,
|
||||||
temperature=0.1,
|
# temperature=0.1,
|
||||||
aws_region_name=aws_region_name,
|
# aws_region_name=aws_region_name,
|
||||||
aws_access_key_id=aws_access_key_id,
|
# aws_access_key_id=aws_access_key_id,
|
||||||
aws_secret_access_key=aws_secret_access_key,
|
# aws_secret_access_key=aws_secret_access_key,
|
||||||
aws_role_name=aws_role_name,
|
# aws_role_name=aws_role_name,
|
||||||
aws_session_name="my-test-session",
|
# aws_session_name="my-test-session",
|
||||||
)
|
# )
|
||||||
|
|
||||||
response = embedding(
|
# response = embedding(
|
||||||
model="cohere.embed-multilingual-v3",
|
# model="cohere.embed-multilingual-v3",
|
||||||
input=["hello world"],
|
# input=["hello world"],
|
||||||
aws_region_name="us-east-1",
|
# aws_region_name="us-east-1",
|
||||||
aws_access_key_id=aws_access_key_id,
|
# aws_access_key_id=aws_access_key_id,
|
||||||
aws_secret_access_key=aws_secret_access_key,
|
# aws_secret_access_key=aws_secret_access_key,
|
||||||
aws_role_name=aws_role_name,
|
# aws_role_name=aws_role_name,
|
||||||
aws_session_name="my-test-session",
|
# aws_session_name="my-test-session",
|
||||||
)
|
# )
|
||||||
|
|
||||||
response = completion(
|
# response = completion(
|
||||||
model="gpt-3.5-turbo",
|
# model="gpt-3.5-turbo",
|
||||||
messages=messages,
|
# messages=messages,
|
||||||
aws_region_name="us-east-1",
|
# aws_region_name="us-east-1",
|
||||||
aws_access_key_id=aws_access_key_id,
|
# aws_access_key_id=aws_access_key_id,
|
||||||
aws_secret_access_key=aws_secret_access_key,
|
# aws_secret_access_key=aws_secret_access_key,
|
||||||
aws_role_name=aws_role_name,
|
# aws_role_name=aws_role_name,
|
||||||
aws_session_name="my-test-session",
|
# aws_session_name="my-test-session",
|
||||||
)
|
# )
|
||||||
# Add any assertions here to check the response
|
# # Add any assertions here to check the response
|
||||||
print(response)
|
# print(response)
|
||||||
except RateLimitError:
|
# except RateLimitError:
|
||||||
pass
|
# pass
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
test_completion_bedrock_claude_sts_client_auth()
|
# # test_completion_bedrock_claude_sts_client_auth()
|
||||||
|
|
||||||
|
|
||||||
def test_provisioned_throughput():
|
# def test_provisioned_throughput():
|
||||||
try:
|
# try:
|
||||||
litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
import botocore, json, io
|
# import botocore, json, io
|
||||||
import botocore.session
|
# import botocore.session
|
||||||
from botocore.stub import Stubber
|
# from botocore.stub import Stubber
|
||||||
|
|
||||||
bedrock_client = botocore.session.get_session().create_client(
|
# bedrock_client = botocore.session.get_session().create_client(
|
||||||
"bedrock-runtime", region_name="us-east-1"
|
# "bedrock-runtime", region_name="us-east-1"
|
||||||
)
|
# )
|
||||||
|
|
||||||
expected_params = {
|
# expected_params = {
|
||||||
"accept": "application/json",
|
# "accept": "application/json",
|
||||||
"body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
|
# "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
|
||||||
'"max_tokens_to_sample": 256}',
|
# '"max_tokens_to_sample": 256}',
|
||||||
"contentType": "application/json",
|
# "contentType": "application/json",
|
||||||
"modelId": "provisioned-model-arn",
|
# "modelId": "provisioned-model-arn",
|
||||||
}
|
# }
|
||||||
response_from_bedrock = {
|
# response_from_bedrock = {
|
||||||
"body": io.StringIO(
|
# "body": io.StringIO(
|
||||||
json.dumps(
|
# json.dumps(
|
||||||
{
|
# {
|
||||||
"completion": " Here is a short poem about the sky:",
|
# "completion": " Here is a short poem about the sky:",
|
||||||
"stop_reason": "max_tokens",
|
# "stop_reason": "max_tokens",
|
||||||
"stop": None,
|
# "stop": None,
|
||||||
}
|
# }
|
||||||
)
|
# )
|
||||||
),
|
# ),
|
||||||
"contentType": "contentType",
|
# "contentType": "contentType",
|
||||||
"ResponseMetadata": {"HTTPStatusCode": 200},
|
# "ResponseMetadata": {"HTTPStatusCode": 200},
|
||||||
}
|
# }
|
||||||
|
|
||||||
with Stubber(bedrock_client) as stubber:
|
# with Stubber(bedrock_client) as stubber:
|
||||||
stubber.add_response(
|
# stubber.add_response(
|
||||||
"invoke_model",
|
# "invoke_model",
|
||||||
service_response=response_from_bedrock,
|
# service_response=response_from_bedrock,
|
||||||
expected_params=expected_params,
|
# expected_params=expected_params,
|
||||||
)
|
# )
|
||||||
response = litellm.completion(
|
# response = litellm.completion(
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
# model="bedrock/anthropic.claude-instant-v1",
|
||||||
model_id="provisioned-model-arn",
|
# model_id="provisioned-model-arn",
|
||||||
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
# messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||||
aws_bedrock_client=bedrock_client,
|
# aws_bedrock_client=bedrock_client,
|
||||||
)
|
# )
|
||||||
print("response stubbed", response)
|
# print("response stubbed", response)
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# test_provisioned_throughput()
|
# # test_provisioned_throughput()
|
||||||
|
|
|
@ -546,6 +546,7 @@ def test_redis_cache_acompletion_stream():
|
||||||
# test_redis_cache_acompletion_stream()
|
# test_redis_cache_acompletion_stream()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_redis_cache_acompletion_stream_bedrock():
|
def test_redis_cache_acompletion_stream_bedrock():
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
|
@ -571,7 +572,7 @@ def test_redis_cache_acompletion_stream_bedrock():
|
||||||
async def call1():
|
async def call1():
|
||||||
nonlocal response_1_content
|
nonlocal response_1_content
|
||||||
response1 = await litellm.acompletion(
|
response1 = await litellm.acompletion(
|
||||||
model="bedrock/anthropic.claude-v1",
|
model="bedrock/anthropic.claude-v2",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=40,
|
max_tokens=40,
|
||||||
temperature=1,
|
temperature=1,
|
||||||
|
@ -589,7 +590,7 @@ def test_redis_cache_acompletion_stream_bedrock():
|
||||||
async def call2():
|
async def call2():
|
||||||
nonlocal response_2_content
|
nonlocal response_2_content
|
||||||
response2 = await litellm.acompletion(
|
response2 = await litellm.acompletion(
|
||||||
model="bedrock/anthropic.claude-v1",
|
model="bedrock/anthropic.claude-v2",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=40,
|
max_tokens=40,
|
||||||
temperature=1,
|
temperature=1,
|
||||||
|
@ -615,6 +616,7 @@ def test_redis_cache_acompletion_stream_bedrock():
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_s3_cache_acompletion_stream_azure():
|
def test_s3_cache_acompletion_stream_azure():
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
|
@ -697,6 +699,7 @@ def test_s3_cache_acompletion_stream_azure():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
async def test_s3_cache_acompletion_azure():
|
async def test_s3_cache_acompletion_azure():
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
|
|
@ -1404,6 +1404,7 @@ def test_customprompt_together_ai():
|
||||||
# test_customprompt_together_ai()
|
# test_customprompt_together_ai()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_sagemaker():
|
def test_completion_sagemaker():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
@ -1429,6 +1430,7 @@ def test_completion_sagemaker():
|
||||||
# test_completion_sagemaker()
|
# test_completion_sagemaker()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_sagemaker_stream():
|
def test_completion_sagemaker_stream():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = False
|
litellm.set_verbose = False
|
||||||
|
@ -1459,6 +1461,7 @@ def test_completion_sagemaker_stream():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_chat_sagemaker():
|
def test_completion_chat_sagemaker():
|
||||||
try:
|
try:
|
||||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
|
@ -1483,6 +1486,7 @@ def test_completion_chat_sagemaker():
|
||||||
# test_completion_chat_sagemaker()
|
# test_completion_chat_sagemaker()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_chat_sagemaker_mistral():
|
def test_completion_chat_sagemaker_mistral():
|
||||||
try:
|
try:
|
||||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
|
@ -1501,6 +1505,7 @@ def test_completion_chat_sagemaker_mistral():
|
||||||
# test_completion_chat_sagemaker_mistral()
|
# test_completion_chat_sagemaker_mistral()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_bedrock_titan_null_response():
|
def test_completion_bedrock_titan_null_response():
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
|
@ -1526,6 +1531,7 @@ def test_completion_bedrock_titan_null_response():
|
||||||
pytest.fail(f"An error occurred - {str(e)}")
|
pytest.fail(f"An error occurred - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_bedrock_titan():
|
def test_completion_bedrock_titan():
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
|
@ -1547,6 +1553,7 @@ def test_completion_bedrock_titan():
|
||||||
# test_completion_bedrock_titan()
|
# test_completion_bedrock_titan()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_bedrock_claude():
|
def test_completion_bedrock_claude():
|
||||||
print("calling claude")
|
print("calling claude")
|
||||||
try:
|
try:
|
||||||
|
@ -1568,6 +1575,7 @@ def test_completion_bedrock_claude():
|
||||||
# test_completion_bedrock_claude()
|
# test_completion_bedrock_claude()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_bedrock_cohere():
|
def test_completion_bedrock_cohere():
|
||||||
print("calling bedrock cohere")
|
print("calling bedrock cohere")
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
@ -1954,11 +1962,14 @@ def test_completion_gemini():
|
||||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
try:
|
try:
|
||||||
response = completion(model=model_name, messages=messages)
|
response = completion(model=model_name, messages=messages)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions,here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
except litellm.APIError as e:
|
except litellm.APIError as e:
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
if "InternalServerError" in str(e):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@ -1974,7 +1985,12 @@ async def test_acompletion_gemini():
|
||||||
response = await litellm.acompletion(model=model_name, messages=messages)
|
response = await litellm.acompletion(model=model_name, messages=messages)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
|
except litellm.APIError as e:
|
||||||
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
if "InternalServerError" in str(e):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -171,6 +171,7 @@ def test_cost_openai_image_gen():
|
||||||
assert cost == 0.019922944
|
assert cost == 0.019922944
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_cost_bedrock_pricing():
|
def test_cost_bedrock_pricing():
|
||||||
"""
|
"""
|
||||||
- get pricing specific to region for a model
|
- get pricing specific to region for a model
|
||||||
|
@ -226,6 +227,7 @@ def test_cost_bedrock_pricing():
|
||||||
assert cost == predicted_cost
|
assert cost == predicted_cost
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS disabled our access")
|
||||||
def test_cost_bedrock_pricing_actual_calls():
|
def test_cost_bedrock_pricing_actual_calls():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
model = "anthropic.claude-instant-v1"
|
model = "anthropic.claude-instant-v1"
|
||||||
|
|
|
@ -80,16 +80,6 @@ model_list:
|
||||||
description: this is a test openai model
|
description: this is a test openai model
|
||||||
id: 9b1ef341-322c-410a-8992-903987fef439
|
id: 9b1ef341-322c-410a-8992-903987fef439
|
||||||
model_name: test_openai_models
|
model_name: test_openai_models
|
||||||
- litellm_params:
|
|
||||||
model: bedrock/amazon.titan-embed-text-v1
|
|
||||||
model_info:
|
|
||||||
mode: embedding
|
|
||||||
model_name: amazon-embeddings
|
|
||||||
- litellm_params:
|
|
||||||
model: sagemaker/berri-benchmarking-gpt-j-6b-fp16
|
|
||||||
model_info:
|
|
||||||
mode: embedding
|
|
||||||
model_name: GPT-J 6B - Sagemaker Text Embedding (Internal)
|
|
||||||
- litellm_params:
|
- litellm_params:
|
||||||
model: dall-e-3
|
model: dall-e-3
|
||||||
model_info:
|
model_info:
|
||||||
|
|
|
@ -478,17 +478,18 @@ async def test_async_chat_azure_stream():
|
||||||
|
|
||||||
|
|
||||||
## Test Bedrock + sync
|
## Test Bedrock + sync
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_chat_bedrock_stream():
|
def test_chat_bedrock_stream():
|
||||||
try:
|
try:
|
||||||
customHandler = CompletionCustomHandler()
|
customHandler = CompletionCustomHandler()
|
||||||
litellm.callbacks = [customHandler]
|
litellm.callbacks = [customHandler]
|
||||||
response = litellm.completion(
|
response = litellm.completion(
|
||||||
model="bedrock/anthropic.claude-v1",
|
model="bedrock/anthropic.claude-v2",
|
||||||
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
|
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
|
||||||
)
|
)
|
||||||
# test streaming
|
# test streaming
|
||||||
response = litellm.completion(
|
response = litellm.completion(
|
||||||
model="bedrock/anthropic.claude-v1",
|
model="bedrock/anthropic.claude-v2",
|
||||||
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
|
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
|
@ -497,7 +498,7 @@ def test_chat_bedrock_stream():
|
||||||
# test failure callback
|
# test failure callback
|
||||||
try:
|
try:
|
||||||
response = litellm.completion(
|
response = litellm.completion(
|
||||||
model="bedrock/anthropic.claude-v1",
|
model="bedrock/anthropic.claude-v2",
|
||||||
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
|
messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
|
||||||
aws_region_name="my-bad-region",
|
aws_region_name="my-bad-region",
|
||||||
stream=True,
|
stream=True,
|
||||||
|
@ -518,18 +519,19 @@ def test_chat_bedrock_stream():
|
||||||
|
|
||||||
|
|
||||||
## Test Bedrock + Async
|
## Test Bedrock + Async
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_async_chat_bedrock_stream():
|
async def test_async_chat_bedrock_stream():
|
||||||
try:
|
try:
|
||||||
customHandler = CompletionCustomHandler()
|
customHandler = CompletionCustomHandler()
|
||||||
litellm.callbacks = [customHandler]
|
litellm.callbacks = [customHandler]
|
||||||
response = await litellm.acompletion(
|
response = await litellm.acompletion(
|
||||||
model="bedrock/anthropic.claude-v1",
|
model="bedrock/anthropic.claude-v2",
|
||||||
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
|
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
|
||||||
)
|
)
|
||||||
# test streaming
|
# test streaming
|
||||||
response = await litellm.acompletion(
|
response = await litellm.acompletion(
|
||||||
model="bedrock/anthropic.claude-v1",
|
model="bedrock/anthropic.claude-v2",
|
||||||
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
|
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
|
@ -540,7 +542,7 @@ async def test_async_chat_bedrock_stream():
|
||||||
## test failure callback
|
## test failure callback
|
||||||
try:
|
try:
|
||||||
response = await litellm.acompletion(
|
response = await litellm.acompletion(
|
||||||
model="bedrock/anthropic.claude-v1",
|
model="bedrock/anthropic.claude-v2",
|
||||||
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
|
messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
|
||||||
aws_region_name="my-bad-key",
|
aws_region_name="my-bad-key",
|
||||||
stream=True,
|
stream=True,
|
||||||
|
@ -561,6 +563,7 @@ async def test_async_chat_bedrock_stream():
|
||||||
|
|
||||||
|
|
||||||
## Test Sagemaker + Async
|
## Test Sagemaker + Async
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_async_chat_sagemaker_stream():
|
async def test_async_chat_sagemaker_stream():
|
||||||
try:
|
try:
|
||||||
|
@ -793,6 +796,7 @@ async def test_async_embedding_azure():
|
||||||
|
|
||||||
|
|
||||||
## Test Bedrock + Async
|
## Test Bedrock + Async
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_async_embedding_bedrock():
|
async def test_async_embedding_bedrock():
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -388,6 +388,7 @@ async def test_async_custom_handler_embedding_optional_param():
|
||||||
# asyncio.run(test_async_custom_handler_embedding_optional_param())
|
# asyncio.run(test_async_custom_handler_embedding_optional_param())
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Account suspended. Pending their approval")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_async_custom_handler_embedding_optional_param_bedrock():
|
async def test_async_custom_handler_embedding_optional_param_bedrock():
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -67,6 +67,7 @@ def verify_log_file(log_file_path):
|
||||||
assert success_count == 3 # Expect 3 success logs from dynamoDB
|
assert success_count == 3 # Expect 3 success logs from dynamoDB
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_dynamo_logging():
|
def test_dynamo_logging():
|
||||||
# all dynamodb requests need to be in one test function
|
# all dynamodb requests need to be in one test function
|
||||||
# since we are modifying stdout, and pytests runs tests in parallel
|
# since we are modifying stdout, and pytests runs tests in parallel
|
||||||
|
|
|
@ -256,6 +256,7 @@ async def test_vertexai_aembedding():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_bedrock_embedding_titan():
|
def test_bedrock_embedding_titan():
|
||||||
try:
|
try:
|
||||||
# this tests if we support str input for bedrock embedding
|
# this tests if we support str input for bedrock embedding
|
||||||
|
@ -301,6 +302,7 @@ def test_bedrock_embedding_titan():
|
||||||
# test_bedrock_embedding_titan()
|
# test_bedrock_embedding_titan()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_bedrock_embedding_cohere():
|
def test_bedrock_embedding_cohere():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = False
|
litellm.set_verbose = False
|
||||||
|
@ -422,6 +424,7 @@ def test_aembedding_azure():
|
||||||
# test_aembedding_azure()
|
# test_aembedding_azure()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_sagemaker_embeddings():
|
def test_sagemaker_embeddings():
|
||||||
try:
|
try:
|
||||||
response = litellm.embedding(
|
response = litellm.embedding(
|
||||||
|
@ -438,6 +441,7 @@ def test_sagemaker_embeddings():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_sagemaker_aembeddings():
|
async def test_sagemaker_aembeddings():
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -42,6 +42,7 @@ exception_models = [
|
||||||
|
|
||||||
|
|
||||||
# Test 1: Context Window Errors
|
# Test 1: Context Window Errors
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.parametrize("model", exception_models)
|
@pytest.mark.parametrize("model", exception_models)
|
||||||
def test_context_window(model):
|
def test_context_window(model):
|
||||||
print("Testing context window error")
|
print("Testing context window error")
|
||||||
|
@ -120,9 +121,9 @@ def invalid_auth(model): # set the model key to an invalid key, depending on th
|
||||||
os.environ["AI21_API_KEY"] = "bad-key"
|
os.environ["AI21_API_KEY"] = "bad-key"
|
||||||
elif "togethercomputer" in model:
|
elif "togethercomputer" in model:
|
||||||
temporary_key = os.environ["TOGETHERAI_API_KEY"]
|
temporary_key = os.environ["TOGETHERAI_API_KEY"]
|
||||||
os.environ[
|
os.environ["TOGETHERAI_API_KEY"] = (
|
||||||
"TOGETHERAI_API_KEY"
|
"84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
|
||||||
] = "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
|
)
|
||||||
elif model in litellm.openrouter_models:
|
elif model in litellm.openrouter_models:
|
||||||
temporary_key = os.environ["OPENROUTER_API_KEY"]
|
temporary_key = os.environ["OPENROUTER_API_KEY"]
|
||||||
os.environ["OPENROUTER_API_KEY"] = "bad-key"
|
os.environ["OPENROUTER_API_KEY"] = "bad-key"
|
||||||
|
|
|
@ -87,6 +87,7 @@ async def test_azure_img_gen_health_check():
|
||||||
# asyncio.run(test_azure_img_gen_health_check())
|
# asyncio.run(test_azure_img_gen_health_check())
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_sagemaker_embedding_health_check():
|
async def test_sagemaker_embedding_health_check():
|
||||||
response = await litellm.ahealth_check(
|
response = await litellm.ahealth_check(
|
||||||
|
|
|
@ -121,6 +121,7 @@ async def test_async_image_generation_azure():
|
||||||
pytest.fail(f"An exception occurred - {str(e)}")
|
pytest.fail(f"An exception occurred - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_image_generation_bedrock():
|
def test_image_generation_bedrock():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
@ -141,6 +142,7 @@ def test_image_generation_bedrock():
|
||||||
pytest.fail(f"An exception occurred - {str(e)}")
|
pytest.fail(f"An exception occurred - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_aimage_generation_bedrock_with_optional_params():
|
async def test_aimage_generation_bedrock_with_optional_params():
|
||||||
try:
|
try:
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -80,6 +80,14 @@ request_data = {
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def prisma_client():
|
def prisma_client():
|
||||||
|
from litellm.proxy.proxy_cli import append_query_params
|
||||||
|
|
||||||
|
### add connection pool + pool timeout args
|
||||||
|
params = {"connection_limit": 100, "pool_timeout": 60}
|
||||||
|
database_url = os.getenv("DATABASE_URL")
|
||||||
|
modified_url = append_query_params(database_url, params)
|
||||||
|
os.environ["DATABASE_URL"] = modified_url
|
||||||
|
|
||||||
# Assuming DBClient is a class that needs to be instantiated
|
# Assuming DBClient is a class that needs to be instantiated
|
||||||
prisma_client = PrismaClient(
|
prisma_client = PrismaClient(
|
||||||
database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
|
database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
|
||||||
|
@ -1633,3 +1641,99 @@ async def test_key_with_no_permissions(prisma_client):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Got Exception", e)
|
print("Got Exception", e)
|
||||||
print(e.message)
|
print(e.message)
|
||||||
|
|
||||||
|
|
||||||
|
async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
|
||||||
|
from litellm import ModelResponse, Choices, Message, Usage
|
||||||
|
from litellm.proxy.proxy_server import (
|
||||||
|
_PROXY_track_cost_callback as track_cost_callback,
|
||||||
|
)
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
|
||||||
|
resp = ModelResponse(
|
||||||
|
id=request_id,
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason=None,
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
||||||
|
role="assistant",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
model="gpt-35-turbo", # azure always has model written like this
|
||||||
|
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
||||||
|
)
|
||||||
|
await track_cost_callback(
|
||||||
|
kwargs={
|
||||||
|
"call_type": "acompletion",
|
||||||
|
"model": "sagemaker-chatgpt-v-2",
|
||||||
|
"stream": True,
|
||||||
|
"complete_streaming_response": resp,
|
||||||
|
"litellm_params": {
|
||||||
|
"metadata": {
|
||||||
|
"user_api_key": hash_token(generated_key),
|
||||||
|
"user_api_key_user_id": user_id,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"response_cost": 0.00005,
|
||||||
|
},
|
||||||
|
completion_response=resp,
|
||||||
|
start_time=datetime.now(),
|
||||||
|
end_time=datetime.now(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="High traffic load test for spend tracking")
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_proxy_load_test_db(prisma_client):
|
||||||
|
"""
|
||||||
|
Run 1500 req./s against track_cost_callback function
|
||||||
|
"""
|
||||||
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||||
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||||
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
import logging, time
|
||||||
|
|
||||||
|
litellm.set_verbose = True
|
||||||
|
verbose_proxy_logger.setLevel(logging.DEBUG)
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
|
request = GenerateKeyRequest(max_budget=0.00001)
|
||||||
|
key = await generate_key_fn(request)
|
||||||
|
print(key)
|
||||||
|
|
||||||
|
generated_key = key.key
|
||||||
|
user_id = key.user_id
|
||||||
|
bearer_token = "Bearer " + generated_key
|
||||||
|
|
||||||
|
request = Request(scope={"type": "http"})
|
||||||
|
request._url = URL(url="/chat/completions")
|
||||||
|
|
||||||
|
# use generated key to auth in
|
||||||
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||||
|
print("result from user auth with new key", result)
|
||||||
|
# update spend using track_cost callback, make 2nd request, it should fail
|
||||||
|
n = 5000
|
||||||
|
tasks = [
|
||||||
|
track_cost_callback_helper_fn(generated_key=generated_key, user_id=user_id)
|
||||||
|
for _ in range(n)
|
||||||
|
]
|
||||||
|
completions = await asyncio.gather(*tasks)
|
||||||
|
await asyncio.sleep(120)
|
||||||
|
try:
|
||||||
|
# call spend logs
|
||||||
|
spend_logs = await view_spend_logs(api_key=generated_key)
|
||||||
|
|
||||||
|
print(f"len responses: {len(spend_logs)}")
|
||||||
|
assert len(spend_logs) == n
|
||||||
|
print(n, time.time() - start_time, len(spend_logs))
|
||||||
|
except:
|
||||||
|
print(n, time.time() - start_time, 0)
|
||||||
|
raise Exception(f"it worked! key={key.key}")
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"An exception occurred - {str(e)}")
|
||||||
|
|
|
@ -12,6 +12,7 @@ import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_sagemaker():
|
def test_completion_sagemaker():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
litellm.drop_params = True
|
litellm.drop_params = True
|
||||||
|
|
|
@ -473,6 +473,7 @@ def aleph_alpha_test_completion():
|
||||||
# Sagemaker
|
# Sagemaker
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def sagemaker_test_completion():
|
def sagemaker_test_completion():
|
||||||
litellm.SagemakerConfig(max_new_tokens=10)
|
litellm.SagemakerConfig(max_new_tokens=10)
|
||||||
# litellm.set_verbose=True
|
# litellm.set_verbose=True
|
||||||
|
@ -514,6 +515,7 @@ def sagemaker_test_completion():
|
||||||
# Bedrock
|
# Bedrock
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def bedrock_test_completion():
|
def bedrock_test_completion():
|
||||||
litellm.AmazonCohereConfig(max_tokens=10)
|
litellm.AmazonCohereConfig(max_tokens=10)
|
||||||
# litellm.set_verbose=True
|
# litellm.set_verbose=True
|
||||||
|
|
|
@ -125,6 +125,7 @@ def test_embedding(client_no_auth):
|
||||||
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
|
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_bedrock_embedding(client_no_auth):
|
def test_bedrock_embedding(client_no_auth):
|
||||||
global headers
|
global headers
|
||||||
from litellm.proxy.proxy_server import user_custom_auth
|
from litellm.proxy.proxy_server import user_custom_auth
|
||||||
|
@ -145,6 +146,7 @@ def test_bedrock_embedding(client_no_auth):
|
||||||
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
|
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_sagemaker_embedding(client_no_auth):
|
def test_sagemaker_embedding(client_no_auth):
|
||||||
global headers
|
global headers
|
||||||
from litellm.proxy.proxy_server import user_custom_auth
|
from litellm.proxy.proxy_server import user_custom_auth
|
||||||
|
|
|
@ -61,6 +61,7 @@ def generate_random_word(length=4):
|
||||||
return "".join(random.choice(letters) for _ in range(length))
|
return "".join(random.choice(letters) for _ in range(length))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_chat_completion(client_no_auth):
|
def test_chat_completion(client_no_auth):
|
||||||
global headers
|
global headers
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -166,14 +166,6 @@ def test_call_one_endpoint():
|
||||||
"tpm": 240000,
|
"tpm": 240000,
|
||||||
"rpm": 1800,
|
"rpm": 1800,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"model_name": "claude-v1",
|
|
||||||
"litellm_params": {
|
|
||||||
"model": "bedrock/anthropic.claude-instant-v1",
|
|
||||||
},
|
|
||||||
"tpm": 100000,
|
|
||||||
"rpm": 10000,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"model_name": "text-embedding-ada-002",
|
"model_name": "text-embedding-ada-002",
|
||||||
"litellm_params": {
|
"litellm_params": {
|
||||||
|
@ -202,15 +194,6 @@ def test_call_one_endpoint():
|
||||||
)
|
)
|
||||||
print("\n response", response)
|
print("\n response", response)
|
||||||
|
|
||||||
async def call_bedrock_claude():
|
|
||||||
response = await router.acompletion(
|
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
|
||||||
messages=[{"role": "user", "content": "hello this request will pass"}],
|
|
||||||
specific_deployment=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
print("\n response", response)
|
|
||||||
|
|
||||||
async def call_azure_embedding():
|
async def call_azure_embedding():
|
||||||
response = await router.aembedding(
|
response = await router.aembedding(
|
||||||
model="azure/azure-embedding-model",
|
model="azure/azure-embedding-model",
|
||||||
|
@ -221,7 +204,6 @@ def test_call_one_endpoint():
|
||||||
print("\n response", response)
|
print("\n response", response)
|
||||||
|
|
||||||
asyncio.run(call_azure_completion())
|
asyncio.run(call_azure_completion())
|
||||||
asyncio.run(call_bedrock_claude())
|
|
||||||
asyncio.run(call_azure_embedding())
|
asyncio.run(call_azure_embedding())
|
||||||
|
|
||||||
os.environ["AZURE_API_BASE"] = old_api_base
|
os.environ["AZURE_API_BASE"] = old_api_base
|
||||||
|
@ -593,6 +575,7 @@ def test_azure_embedding_on_router():
|
||||||
# test_azure_embedding_on_router()
|
# test_azure_embedding_on_router()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_bedrock_on_router():
|
def test_bedrock_on_router():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
print("\n Testing bedrock on router\n")
|
print("\n Testing bedrock on router\n")
|
||||||
|
|
|
@ -87,6 +87,7 @@ def test_router_timeouts():
|
||||||
print("********** TOKENS USED SO FAR = ", total_tokens_used)
|
print("********** TOKENS USED SO FAR = ", total_tokens_used)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_router_timeouts_bedrock():
|
async def test_router_timeouts_bedrock():
|
||||||
import openai
|
import openai
|
||||||
|
|
|
@ -764,6 +764,7 @@ def test_completion_replicate_stream_bad_key():
|
||||||
# test_completion_replicate_stream_bad_key()
|
# test_completion_replicate_stream_bad_key()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_bedrock_claude_stream():
|
def test_completion_bedrock_claude_stream():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = False
|
litellm.set_verbose = False
|
||||||
|
@ -810,6 +811,7 @@ def test_completion_bedrock_claude_stream():
|
||||||
# test_completion_bedrock_claude_stream()
|
# test_completion_bedrock_claude_stream()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_bedrock_ai21_stream():
|
def test_completion_bedrock_ai21_stream():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = False
|
litellm.set_verbose = False
|
||||||
|
@ -911,6 +913,7 @@ def test_sagemaker_weird_response():
|
||||||
# test_sagemaker_weird_response()
|
# test_sagemaker_weird_response()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_sagemaker_streaming_async():
|
async def test_sagemaker_streaming_async():
|
||||||
try:
|
try:
|
||||||
|
@ -949,6 +952,7 @@ async def test_sagemaker_streaming_async():
|
||||||
# asyncio.run(test_sagemaker_streaming_async())
|
# asyncio.run(test_sagemaker_streaming_async())
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_sagemaker_stream():
|
def test_completion_sagemaker_stream():
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
|
@ -1075,8 +1079,6 @@ async def test_hf_completion_tgi_stream():
|
||||||
if finished:
|
if finished:
|
||||||
break
|
break
|
||||||
idx += 1
|
idx += 1
|
||||||
if complete_response.strip() == "":
|
|
||||||
raise Exception("Empty response received")
|
|
||||||
print(f"completion_response: {complete_response}")
|
print(f"completion_response: {complete_response}")
|
||||||
except litellm.ServiceUnavailableError as e:
|
except litellm.ServiceUnavailableError as e:
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -317,3 +317,24 @@ def test_token_counter():
|
||||||
|
|
||||||
|
|
||||||
# test_token_counter()
|
# test_token_counter()
|
||||||
|
|
||||||
|
|
||||||
|
def test_supports_function_calling():
|
||||||
|
try:
|
||||||
|
assert litellm.supports_function_calling(model="gpt-3.5-turbo") == True
|
||||||
|
assert (
|
||||||
|
litellm.supports_function_calling(model="azure/gpt-4-1106-preview") == True
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
litellm.supports_function_calling(model="anthropic.claude-instant-v1")
|
||||||
|
== False
|
||||||
|
)
|
||||||
|
assert litellm.supports_function_calling(model="palm/chat-bison") == False
|
||||||
|
assert litellm.supports_function_calling(model="ollama/llama2") == False
|
||||||
|
assert (
|
||||||
|
litellm.supports_function_calling(model="anthropic.claude-instant-v1")
|
||||||
|
== False
|
||||||
|
)
|
||||||
|
assert litellm.supports_function_calling(model="claude-2") == False
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
122
litellm/utils.py
122
litellm/utils.py
|
@ -205,18 +205,18 @@ def map_finish_reason(
|
||||||
|
|
||||||
class FunctionCall(OpenAIObject):
|
class FunctionCall(OpenAIObject):
|
||||||
arguments: str
|
arguments: str
|
||||||
name: str
|
name: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class Function(OpenAIObject):
|
class Function(OpenAIObject):
|
||||||
arguments: str
|
arguments: str
|
||||||
name: str
|
name: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionDeltaToolCall(OpenAIObject):
|
class ChatCompletionDeltaToolCall(OpenAIObject):
|
||||||
id: str
|
id: Optional[str] = None
|
||||||
function: Function
|
function: Function
|
||||||
type: str
|
type: Optional[str] = None
|
||||||
index: int
|
index: int
|
||||||
|
|
||||||
|
|
||||||
|
@ -275,13 +275,19 @@ class Delta(OpenAIObject):
|
||||||
super(Delta, self).__init__(**params)
|
super(Delta, self).__init__(**params)
|
||||||
self.content = content
|
self.content = content
|
||||||
self.role = role
|
self.role = role
|
||||||
|
if function_call is not None and isinstance(function_call, dict):
|
||||||
|
self.function_call = FunctionCall(**function_call)
|
||||||
|
else:
|
||||||
self.function_call = function_call
|
self.function_call = function_call
|
||||||
if tool_calls is not None and isinstance(tool_calls, dict):
|
if tool_calls is not None and isinstance(tool_calls, list):
|
||||||
self.tool_calls = []
|
self.tool_calls = []
|
||||||
for tool_call in tool_calls:
|
for tool_call in tool_calls:
|
||||||
|
if isinstance(tool_call, dict):
|
||||||
if tool_call.get("index", None) is None:
|
if tool_call.get("index", None) is None:
|
||||||
tool_call["index"] = 0
|
tool_call["index"] = 0
|
||||||
self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
|
self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
|
||||||
|
elif isinstance(tool_call, ChatCompletionDeltaToolCall):
|
||||||
|
self.tool_calls.append(tool_call)
|
||||||
else:
|
else:
|
||||||
self.tool_calls = tool_calls
|
self.tool_calls = tool_calls
|
||||||
|
|
||||||
|
@ -1634,7 +1640,7 @@ class Logging:
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
"Async success callbacks: Got a complete streaming response"
|
"Async success callbacks: Got a complete streaming response"
|
||||||
)
|
)
|
||||||
self.model_call_details["complete_streaming_response"] = (
|
self.model_call_details["async_complete_streaming_response"] = (
|
||||||
complete_streaming_response
|
complete_streaming_response
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
|
@ -1682,28 +1688,31 @@ class Logging:
|
||||||
print_verbose("async success_callback: reaches cache for logging!")
|
print_verbose("async success_callback: reaches cache for logging!")
|
||||||
kwargs = self.model_call_details
|
kwargs = self.model_call_details
|
||||||
if self.stream:
|
if self.stream:
|
||||||
if "complete_streaming_response" not in kwargs:
|
if "async_complete_streaming_response" not in kwargs:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"async success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
|
f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n"
|
||||||
)
|
)
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
"async success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
|
"async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache"
|
||||||
)
|
)
|
||||||
result = kwargs["complete_streaming_response"]
|
result = kwargs["async_complete_streaming_response"]
|
||||||
# only add to cache once we have a complete streaming response
|
# only add to cache once we have a complete streaming response
|
||||||
litellm.cache.add_cache(result, **kwargs)
|
litellm.cache.add_cache(result, **kwargs)
|
||||||
if isinstance(callback, CustomLogger): # custom logger class
|
if isinstance(callback, CustomLogger): # custom logger class
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"Async success callbacks: {callback}; self.stream: {self.stream}; complete_streaming_response: {self.model_call_details.get('complete_streaming_response', None)}"
|
f"Running Async success callback: {callback}; self.stream: {self.stream}; async_complete_streaming_response: {self.model_call_details.get('async_complete_streaming_response', None)} result={result}"
|
||||||
)
|
)
|
||||||
if self.stream == True:
|
if self.stream == True:
|
||||||
if "complete_streaming_response" in self.model_call_details:
|
if (
|
||||||
|
"async_complete_streaming_response"
|
||||||
|
in self.model_call_details
|
||||||
|
):
|
||||||
await callback.async_log_success_event(
|
await callback.async_log_success_event(
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
response_obj=self.model_call_details[
|
response_obj=self.model_call_details[
|
||||||
"complete_streaming_response"
|
"async_complete_streaming_response"
|
||||||
],
|
],
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
|
@ -1724,14 +1733,18 @@ class Logging:
|
||||||
)
|
)
|
||||||
if callable(callback): # custom logger functions
|
if callable(callback): # custom logger functions
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"Making async function logging call - {self.model_call_details}"
|
f"Making async function logging call for {callback}, result={result} - {self.model_call_details}"
|
||||||
)
|
)
|
||||||
if self.stream:
|
if self.stream:
|
||||||
if "complete_streaming_response" in self.model_call_details:
|
if (
|
||||||
|
"async_complete_streaming_response"
|
||||||
|
in self.model_call_details
|
||||||
|
):
|
||||||
|
|
||||||
await customLogger.async_log_event(
|
await customLogger.async_log_event(
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
response_obj=self.model_call_details[
|
response_obj=self.model_call_details[
|
||||||
"complete_streaming_response"
|
"async_complete_streaming_response"
|
||||||
],
|
],
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
|
@ -1752,14 +1765,17 @@ class Logging:
|
||||||
if dynamoLogger is None:
|
if dynamoLogger is None:
|
||||||
dynamoLogger = DyanmoDBLogger()
|
dynamoLogger = DyanmoDBLogger()
|
||||||
if self.stream:
|
if self.stream:
|
||||||
if "complete_streaming_response" in self.model_call_details:
|
if (
|
||||||
|
"async_complete_streaming_response"
|
||||||
|
in self.model_call_details
|
||||||
|
):
|
||||||
print_verbose(
|
print_verbose(
|
||||||
"DynamoDB Logger: Got Stream Event - Completed Stream Response"
|
"DynamoDB Logger: Got Stream Event - Completed Stream Response"
|
||||||
)
|
)
|
||||||
await dynamoLogger._async_log_event(
|
await dynamoLogger._async_log_event(
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
response_obj=self.model_call_details[
|
response_obj=self.model_call_details[
|
||||||
"complete_streaming_response"
|
"async_complete_streaming_response"
|
||||||
],
|
],
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
|
@ -3713,6 +3729,54 @@ def completion_cost(
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def supports_function_calling(model: str):
|
||||||
|
"""
|
||||||
|
Check if the given model supports function calling and return a boolean value.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
model (str): The model name to be checked.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the model supports function calling, False otherwise.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: If the given model is not found in model_prices_and_context_window.json.
|
||||||
|
"""
|
||||||
|
if model in litellm.model_cost:
|
||||||
|
model_info = litellm.model_cost[model]
|
||||||
|
if model_info.get("supports_function_calling", False):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
raise Exception(
|
||||||
|
f"Model not in model_prices_and_context_window.json. You passed model={model}."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def supports_parallel_function_calling(model: str):
|
||||||
|
"""
|
||||||
|
Check if the given model supports parallel function calling and return True if it does, False otherwise.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
model (str): The model to check for support of parallel function calling.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the model supports parallel function calling, False otherwise.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: If the model is not found in the model_cost dictionary.
|
||||||
|
"""
|
||||||
|
if model in litellm.model_cost:
|
||||||
|
model_info = litellm.model_cost[model]
|
||||||
|
if model_info.get("supports_parallel_function_calling", False):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
raise Exception(
|
||||||
|
f"Model not in model_prices_and_context_window.json. You passed model={model}."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
####### HELPER FUNCTIONS ################
|
####### HELPER FUNCTIONS ################
|
||||||
def register_model(model_cost: Union[str, dict]):
|
def register_model(model_cost: Union[str, dict]):
|
||||||
"""
|
"""
|
||||||
|
@ -4041,6 +4105,7 @@ def get_optional_params(
|
||||||
and custom_llm_provider != "vertex_ai"
|
and custom_llm_provider != "vertex_ai"
|
||||||
and custom_llm_provider != "anyscale"
|
and custom_llm_provider != "anyscale"
|
||||||
and custom_llm_provider != "together_ai"
|
and custom_llm_provider != "together_ai"
|
||||||
|
and custom_llm_provider != "mistral"
|
||||||
):
|
):
|
||||||
if custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
|
if custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
|
||||||
# ollama actually supports json output
|
# ollama actually supports json output
|
||||||
|
@ -4711,7 +4776,14 @@ def get_optional_params(
|
||||||
if max_tokens:
|
if max_tokens:
|
||||||
optional_params["max_tokens"] = max_tokens
|
optional_params["max_tokens"] = max_tokens
|
||||||
elif custom_llm_provider == "mistral":
|
elif custom_llm_provider == "mistral":
|
||||||
supported_params = ["temperature", "top_p", "stream", "max_tokens"]
|
supported_params = [
|
||||||
|
"temperature",
|
||||||
|
"top_p",
|
||||||
|
"stream",
|
||||||
|
"max_tokens",
|
||||||
|
"tools",
|
||||||
|
"tool_choice",
|
||||||
|
]
|
||||||
_check_valid_arg(supported_params=supported_params)
|
_check_valid_arg(supported_params=supported_params)
|
||||||
if temperature is not None:
|
if temperature is not None:
|
||||||
optional_params["temperature"] = temperature
|
optional_params["temperature"] = temperature
|
||||||
|
@ -4721,6 +4793,10 @@ def get_optional_params(
|
||||||
optional_params["stream"] = stream
|
optional_params["stream"] = stream
|
||||||
if max_tokens is not None:
|
if max_tokens is not None:
|
||||||
optional_params["max_tokens"] = max_tokens
|
optional_params["max_tokens"] = max_tokens
|
||||||
|
if tools is not None:
|
||||||
|
optional_params["tools"] = tools
|
||||||
|
if tool_choice is not None:
|
||||||
|
optional_params["tool_choice"] = tool_choice
|
||||||
|
|
||||||
# check safe_mode, random_seed: https://docs.mistral.ai/api/#operation/createChatCompletion
|
# check safe_mode, random_seed: https://docs.mistral.ai/api/#operation/createChatCompletion
|
||||||
safe_mode = passed_params.pop("safe_mode", None)
|
safe_mode = passed_params.pop("safe_mode", None)
|
||||||
|
@ -6945,7 +7021,7 @@ def exception_type(
|
||||||
if "500 An internal error has occurred." in error_str:
|
if "500 An internal error has occurred." in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise APIError(
|
raise APIError(
|
||||||
status_code=original_exception.status_code,
|
status_code=getattr(original_exception, "status_code", 500),
|
||||||
message=f"PalmException - {original_exception.message}",
|
message=f"PalmException - {original_exception.message}",
|
||||||
llm_provider="palm",
|
llm_provider="palm",
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -8728,7 +8804,7 @@ class CustomStreamWrapper:
|
||||||
or original_chunk.choices[0].delta.tool_calls is not None
|
or original_chunk.choices[0].delta.tool_calls is not None
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
delta = dict(original_chunk.choices[0].delta)
|
delta = original_chunk.choices[0].delta
|
||||||
model_response.system_fingerprint = (
|
model_response.system_fingerprint = (
|
||||||
original_chunk.system_fingerprint
|
original_chunk.system_fingerprint
|
||||||
)
|
)
|
||||||
|
@ -8763,7 +8839,9 @@ class CustomStreamWrapper:
|
||||||
is None
|
is None
|
||||||
):
|
):
|
||||||
t.function.arguments = ""
|
t.function.arguments = ""
|
||||||
model_response.choices[0].delta = Delta(**delta)
|
_json_delta = delta.model_dump()
|
||||||
|
print_verbose(f"_json_delta: {_json_delta}")
|
||||||
|
model_response.choices[0].delta = Delta(**_json_delta)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
model_response.choices[0].delta = Delta()
|
model_response.choices[0].delta = Delta()
|
||||||
|
|
|
@ -6,7 +6,8 @@
|
||||||
"input_cost_per_token": 0.00003,
|
"input_cost_per_token": 0.00003,
|
||||||
"output_cost_per_token": 0.00006,
|
"output_cost_per_token": 0.00006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-4-turbo-preview": {
|
"gpt-4-turbo-preview": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
|
@ -15,7 +16,9 @@
|
||||||
"input_cost_per_token": 0.00001,
|
"input_cost_per_token": 0.00001,
|
||||||
"output_cost_per_token": 0.00003,
|
"output_cost_per_token": 0.00003,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-4-0314": {
|
"gpt-4-0314": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
|
@ -33,7 +36,8 @@
|
||||||
"input_cost_per_token": 0.00003,
|
"input_cost_per_token": 0.00003,
|
||||||
"output_cost_per_token": 0.00006,
|
"output_cost_per_token": 0.00006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-4-32k": {
|
"gpt-4-32k": {
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
|
@ -69,7 +73,9 @@
|
||||||
"input_cost_per_token": 0.00001,
|
"input_cost_per_token": 0.00001,
|
||||||
"output_cost_per_token": 0.00003,
|
"output_cost_per_token": 0.00003,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-4-0125-preview": {
|
"gpt-4-0125-preview": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -78,7 +84,9 @@
|
||||||
"input_cost_per_token": 0.00001,
|
"input_cost_per_token": 0.00001,
|
||||||
"output_cost_per_token": 0.00003,
|
"output_cost_per_token": 0.00003,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-4-vision-preview": {
|
"gpt-4-vision-preview": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -105,7 +113,8 @@
|
||||||
"input_cost_per_token": 0.0000015,
|
"input_cost_per_token": 0.0000015,
|
||||||
"output_cost_per_token": 0.000002,
|
"output_cost_per_token": 0.000002,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-0301": {
|
"gpt-3.5-turbo-0301": {
|
||||||
"max_tokens": 4097,
|
"max_tokens": 4097,
|
||||||
|
@ -123,7 +132,8 @@
|
||||||
"input_cost_per_token": 0.0000015,
|
"input_cost_per_token": 0.0000015,
|
||||||
"output_cost_per_token": 0.000002,
|
"output_cost_per_token": 0.000002,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-1106": {
|
"gpt-3.5-turbo-1106": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -132,7 +142,9 @@
|
||||||
"input_cost_per_token": 0.0000010,
|
"input_cost_per_token": 0.0000010,
|
||||||
"output_cost_per_token": 0.0000020,
|
"output_cost_per_token": 0.0000020,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-0125": {
|
"gpt-3.5-turbo-0125": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -141,7 +153,9 @@
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.0000005,
|
||||||
"output_cost_per_token": 0.0000015,
|
"output_cost_per_token": 0.0000015,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-16k": {
|
"gpt-3.5-turbo-16k": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -286,7 +300,9 @@
|
||||||
"input_cost_per_token": 0.00001,
|
"input_cost_per_token": 0.00001,
|
||||||
"output_cost_per_token": 0.00003,
|
"output_cost_per_token": 0.00003,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/gpt-4-1106-preview": {
|
"azure/gpt-4-1106-preview": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -295,7 +311,9 @@
|
||||||
"input_cost_per_token": 0.00001,
|
"input_cost_per_token": 0.00001,
|
||||||
"output_cost_per_token": 0.00003,
|
"output_cost_per_token": 0.00003,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/gpt-4-0613": {
|
"azure/gpt-4-0613": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
|
@ -304,7 +322,8 @@
|
||||||
"input_cost_per_token": 0.00003,
|
"input_cost_per_token": 0.00003,
|
||||||
"output_cost_per_token": 0.00006,
|
"output_cost_per_token": 0.00006,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/gpt-4-32k-0613": {
|
"azure/gpt-4-32k-0613": {
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
|
@ -331,7 +350,8 @@
|
||||||
"input_cost_per_token": 0.00003,
|
"input_cost_per_token": 0.00003,
|
||||||
"output_cost_per_token": 0.00006,
|
"output_cost_per_token": 0.00006,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/gpt-4-turbo": {
|
"azure/gpt-4-turbo": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -340,7 +360,9 @@
|
||||||
"input_cost_per_token": 0.00001,
|
"input_cost_per_token": 0.00001,
|
||||||
"output_cost_per_token": 0.00003,
|
"output_cost_per_token": 0.00003,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/gpt-4-turbo-vision-preview": {
|
"azure/gpt-4-turbo-vision-preview": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -358,7 +380,8 @@
|
||||||
"input_cost_per_token": 0.000003,
|
"input_cost_per_token": 0.000003,
|
||||||
"output_cost_per_token": 0.000004,
|
"output_cost_per_token": 0.000004,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/gpt-35-turbo-1106": {
|
"azure/gpt-35-turbo-1106": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
@ -367,7 +390,20 @@
|
||||||
"input_cost_per_token": 0.0000015,
|
"input_cost_per_token": 0.0000015,
|
||||||
"output_cost_per_token": 0.000002,
|
"output_cost_per_token": 0.000002,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
|
},
|
||||||
|
"azure/gpt-35-turbo-0125": {
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"max_input_tokens": 16384,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.0000005,
|
||||||
|
"output_cost_per_token": 0.0000015,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/gpt-35-turbo-16k": {
|
"azure/gpt-35-turbo-16k": {
|
||||||
"max_tokens": 16385,
|
"max_tokens": 16385,
|
||||||
|
@ -385,7 +421,8 @@
|
||||||
"input_cost_per_token": 0.0000015,
|
"input_cost_per_token": 0.0000015,
|
||||||
"output_cost_per_token": 0.000002,
|
"output_cost_per_token": 0.000002,
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"azure/ada": {
|
"azure/ada": {
|
||||||
"max_tokens": 8191,
|
"max_tokens": 8191,
|
||||||
|
@ -514,11 +551,12 @@
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
"mistral/mistral-large-latest": {
|
"mistral/mistral-large-latest": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 32000,
|
||||||
"input_cost_per_token": 0.000008,
|
"input_cost_per_token": 0.000008,
|
||||||
"output_cost_per_token": 0.000024,
|
"output_cost_per_token": 0.000024,
|
||||||
"litellm_provider": "mistral",
|
"litellm_provider": "mistral",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"mistral/mistral-embed": {
|
"mistral/mistral-embed": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
|
@ -676,7 +714,8 @@
|
||||||
"input_cost_per_token": 0.00000025,
|
"input_cost_per_token": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000005,
|
"output_cost_per_token": 0.0000005,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"gemini-1.5-pro": {
|
"gemini-1.5-pro": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
|
@ -687,6 +726,15 @@
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"gemini-1.5-pro-preview-0215": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 1000000,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0,
|
||||||
|
"output_cost_per_token": 0,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"gemini-pro-vision": {
|
"gemini-pro-vision": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
"max_output_tokens": 2048,
|
"max_output_tokens": 2048,
|
||||||
|
@ -1729,6 +1777,23 @@
|
||||||
"output_cost_per_token": 0.0000009,
|
"output_cost_per_token": 0.0000009,
|
||||||
"litellm_provider": "together_ai"
|
"litellm_provider": "together_ai"
|
||||||
},
|
},
|
||||||
|
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
|
||||||
|
"input_cost_per_token": 0.0000006,
|
||||||
|
"output_cost_per_token": 0.0000006,
|
||||||
|
"litellm_provider": "together_ai",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
|
},
|
||||||
|
"together_ai/mistralai/Mistral-7B-Instruct-v0.1": {
|
||||||
|
"litellm_provider": "together_ai",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
|
},
|
||||||
|
"together_ai/togethercomputer/CodeLlama-34b-Instruct": {
|
||||||
|
"litellm_provider": "together_ai",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true
|
||||||
|
},
|
||||||
"ollama/llama2": {
|
"ollama/llama2": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"input_cost_per_token": 0.0,
|
"input_cost_per_token": 0.0,
|
||||||
|
@ -1981,7 +2046,16 @@
|
||||||
"input_cost_per_token": 0.00000015,
|
"input_cost_per_token": 0.00000015,
|
||||||
"output_cost_per_token": 0.00000015,
|
"output_cost_per_token": 0.00000015,
|
||||||
"litellm_provider": "anyscale",
|
"litellm_provider": "anyscale",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
|
},
|
||||||
|
"anyscale/Mixtral-8x7B-Instruct-v0.1": {
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"input_cost_per_token": 0.00000015,
|
||||||
|
"output_cost_per_token": 0.00000015,
|
||||||
|
"litellm_provider": "anyscale",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
"anyscale/HuggingFaceH4/zephyr-7b-beta": {
|
"anyscale/HuggingFaceH4/zephyr-7b-beta": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
|
|
|
@ -40,6 +40,8 @@ litellm_settings:
|
||||||
budget_duration: 30d
|
budget_duration: 30d
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
|
master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
|
||||||
|
proxy_budget_rescheduler_min_time: 30
|
||||||
|
proxy_budget_rescheduler_max_time: 60
|
||||||
# database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy
|
# database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy
|
||||||
|
|
||||||
environment_variables:
|
environment_variables:
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.27.12"
|
version = "1.27.15"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.27.12"
|
version = "1.27.15"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
|
@ -10,6 +10,7 @@ gunicorn==21.2.0 # server dep
|
||||||
boto3==1.34.34 # aws bedrock/sagemaker calls
|
boto3==1.34.34 # aws bedrock/sagemaker calls
|
||||||
redis==5.0.0 # caching
|
redis==5.0.0 # caching
|
||||||
numpy==1.24.3 # semantic caching
|
numpy==1.24.3 # semantic caching
|
||||||
|
pandas==2.1.1 # for viewing clickhouse spend analytics
|
||||||
prisma==0.11.0 # for db
|
prisma==0.11.0 # for db
|
||||||
mangum==0.17.0 # for aws lambda functions
|
mangum==0.17.0 # for aws lambda functions
|
||||||
google-generativeai==0.3.2 # for vertex ai calls
|
google-generativeai==0.3.2 # for vertex ai calls
|
||||||
|
|
|
@ -449,7 +449,7 @@ async def test_key_with_budgets():
|
||||||
reset_at_init_value = key_info["info"]["budget_reset_at"]
|
reset_at_init_value = key_info["info"]["budget_reset_at"]
|
||||||
reset_at_new_value = None
|
reset_at_new_value = None
|
||||||
i = 0
|
i = 0
|
||||||
await asyncio.sleep(610)
|
await asyncio.sleep(120)
|
||||||
while i < 3:
|
while i < 3:
|
||||||
key_info = await get_key_info(session=session, get_key=key, call_key=key)
|
key_info = await get_key_info(session=session, get_key=key, call_key=key)
|
||||||
reset_at_new_value = key_info["info"]["budget_reset_at"]
|
reset_at_new_value = key_info["info"]["budget_reset_at"]
|
||||||
|
@ -490,6 +490,7 @@ async def test_key_crossing_budget():
|
||||||
assert "ExceededTokenBudget: Current spend for token:" in str(e)
|
assert "ExceededTokenBudget: Current spend for token:" in str(e)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_key_info_spend_values_sagemaker():
|
async def test_key_info_spend_values_sagemaker():
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -313,6 +313,7 @@ export const userSpendLogsCall = async (
|
||||||
endTime: String
|
endTime: String
|
||||||
) => {
|
) => {
|
||||||
try {
|
try {
|
||||||
|
console.log(`user role in spend logs call: ${userRole}`);
|
||||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/spend/logs` : `/spend/logs`;
|
let url = proxyBaseUrl ? `${proxyBaseUrl}/spend/logs` : `/spend/logs`;
|
||||||
if (userRole == "App Owner") {
|
if (userRole == "App Owner") {
|
||||||
url = `${url}/?user_id=${userID}&start_date=${startTime}&end_date=${endTime}`;
|
url = `${url}/?user_id=${userID}&start_date=${startTime}&end_date=${endTime}`;
|
||||||
|
@ -343,6 +344,96 @@ export const userSpendLogsCall = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const adminSpendLogsCall = async (accessToken: String) => {
|
||||||
|
try {
|
||||||
|
let url = proxyBaseUrl
|
||||||
|
? `${proxyBaseUrl}/global/spend/logs`
|
||||||
|
: `/global/spend/logs`;
|
||||||
|
|
||||||
|
message.info("Making spend logs request");
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: "GET",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${accessToken}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorData = await response.text();
|
||||||
|
message.error(errorData);
|
||||||
|
throw new Error("Network response was not ok");
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
console.log(data);
|
||||||
|
message.success("Spend Logs received");
|
||||||
|
return data;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to create key:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export const adminTopKeysCall = async (accessToken: String) => {
|
||||||
|
try {
|
||||||
|
let url = proxyBaseUrl
|
||||||
|
? `${proxyBaseUrl}/global/spend/keys?limit=5`
|
||||||
|
: `/global/spend/keys?limit=5`;
|
||||||
|
|
||||||
|
message.info("Making spend keys request");
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: "GET",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${accessToken}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorData = await response.text();
|
||||||
|
message.error(errorData);
|
||||||
|
throw new Error("Network response was not ok");
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
console.log(data);
|
||||||
|
message.success("Spend Logs received");
|
||||||
|
return data;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to create key:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export const adminTopModelsCall = async (accessToken: String) => {
|
||||||
|
try {
|
||||||
|
let url = proxyBaseUrl
|
||||||
|
? `${proxyBaseUrl}/global/spend/models?limit=5`
|
||||||
|
: `/global/spend/models?limit=5`;
|
||||||
|
|
||||||
|
message.info("Making spend models request");
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: "GET",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${accessToken}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorData = await response.text();
|
||||||
|
message.error(errorData);
|
||||||
|
throw new Error("Network response was not ok");
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
console.log(data);
|
||||||
|
message.success("Spend Logs received");
|
||||||
|
return data;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to create key:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
export const keyInfoCall = async (accessToken: String, keys: String[]) => {
|
export const keyInfoCall = async (accessToken: String, keys: String[]) => {
|
||||||
try {
|
try {
|
||||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/v2/key/info`;
|
let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/v2/key/info`;
|
||||||
|
|
|
@ -2,7 +2,13 @@ import { BarChart, Card, Title } from "@tremor/react";
|
||||||
|
|
||||||
import React, { useState, useEffect } from "react";
|
import React, { useState, useEffect } from "react";
|
||||||
import { Grid, Col, Text, LineChart } from "@tremor/react";
|
import { Grid, Col, Text, LineChart } from "@tremor/react";
|
||||||
import { userSpendLogsCall, keyInfoCall } from "./networking";
|
import {
|
||||||
|
userSpendLogsCall,
|
||||||
|
keyInfoCall,
|
||||||
|
adminSpendLogsCall,
|
||||||
|
adminTopKeysCall,
|
||||||
|
adminTopModelsCall,
|
||||||
|
} from "./networking";
|
||||||
import { start } from "repl";
|
import { start } from "repl";
|
||||||
|
|
||||||
interface UsagePageProps {
|
interface UsagePageProps {
|
||||||
|
@ -164,6 +170,25 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
if (accessToken && token && userRole && userID) {
|
if (accessToken && token && userRole && userID) {
|
||||||
const fetchData = async () => {
|
const fetchData = async () => {
|
||||||
try {
|
try {
|
||||||
|
/**
|
||||||
|
* If user is Admin - query the global views endpoints
|
||||||
|
* If user is App Owner - use the normal spend logs call
|
||||||
|
*/
|
||||||
|
console.log(`user role: ${userRole}`);
|
||||||
|
if (userRole == "Admin") {
|
||||||
|
const overall_spend = await adminSpendLogsCall(accessToken);
|
||||||
|
setKeySpendData(overall_spend);
|
||||||
|
const top_keys = await adminTopKeysCall(accessToken);
|
||||||
|
const filtered_keys = top_keys.map((k: any) => ({
|
||||||
|
key: (k["key_name"] || k["key_alias"] || k["api_key"]).substring(
|
||||||
|
0,
|
||||||
|
7
|
||||||
|
),
|
||||||
|
spend: k["total_spend"],
|
||||||
|
}));
|
||||||
|
setTopKeys(filtered_keys);
|
||||||
|
const top_models = await adminTopModelsCall(accessToken);
|
||||||
|
} else if (userRole == "App Owner") {
|
||||||
await userSpendLogsCall(
|
await userSpendLogsCall(
|
||||||
accessToken,
|
accessToken,
|
||||||
token,
|
token,
|
||||||
|
@ -172,21 +197,34 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
startTime,
|
startTime,
|
||||||
endTime
|
endTime
|
||||||
).then(async (response) => {
|
).then(async (response) => {
|
||||||
|
console.log("result from spend logs call", response);
|
||||||
|
if ("daily_spend" in response) {
|
||||||
|
// this is from clickhouse analytics
|
||||||
|
//
|
||||||
|
let daily_spend = response["daily_spend"];
|
||||||
|
console.log("daily spend", daily_spend);
|
||||||
|
setKeySpendData(daily_spend);
|
||||||
|
let topApiKeys = response.top_api_keys;
|
||||||
|
setTopKeys(topApiKeys);
|
||||||
|
} else {
|
||||||
const topKeysResponse = await keyInfoCall(
|
const topKeysResponse = await keyInfoCall(
|
||||||
accessToken,
|
accessToken,
|
||||||
getTopKeys(response)
|
getTopKeys(response)
|
||||||
);
|
);
|
||||||
const filtered_keys = topKeysResponse["info"].map((k: any) => ({
|
const filtered_keys = topKeysResponse["info"].map((k: any) => ({
|
||||||
key: (k["key_name"] || k["key_alias"] || k["token"]).substring(
|
key: (
|
||||||
0,
|
k["key_name"] ||
|
||||||
7
|
k["key_alias"] ||
|
||||||
),
|
k["token"]
|
||||||
|
).substring(0, 7),
|
||||||
spend: k["spend"],
|
spend: k["spend"],
|
||||||
}));
|
}));
|
||||||
setTopKeys(filtered_keys);
|
setTopKeys(filtered_keys);
|
||||||
setTopUsers(getTopUsers(response));
|
setTopUsers(getTopUsers(response));
|
||||||
setKeySpendData(response);
|
setKeySpendData(response);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("There was an error fetching the data", error);
|
console.error("There was an error fetching the data", error);
|
||||||
// Optionally, update your UI to reflect the error state here as well
|
// Optionally, update your UI to reflect the error state here as well
|
||||||
|
@ -210,7 +248,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
valueFormatter={valueFormatter}
|
valueFormatter={valueFormatter}
|
||||||
yAxisWidth={100}
|
yAxisWidth={100}
|
||||||
tickGap={5}
|
tickGap={5}
|
||||||
customTooltip={customTooltip}
|
// customTooltip={customTooltip}
|
||||||
/>
|
/>
|
||||||
</Card>
|
</Card>
|
||||||
</Col>
|
</Col>
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue