mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
* feat(proxy/utils.py): get associated litellm budget from db in combined_view for key allows user to create rate limit tiers and associate those to keys * feat(proxy/_types.py): update the value of key-level tpm/rpm/model max budget metrics with the associated budget table values if set allows rate limit tiers to be easily applied to keys * docs(rate_limit_tiers.md): add doc on setting rate limit / budget tiers make feature discoverable * feat(key_management_endpoints.py): return litellm_budget_table value in key generate make it easy for user to know associated budget on key creation * fix(key_management_endpoints.py): document 'budget_id' param in `/key/generate` * docs(key_management_endpoints.py): document budget_id usage * refactor(budget_management_endpoints.py): refactor budget endpoints into separate file - makes it easier to run documentation testing against it * docs(test_api_docs.py): add budget endpoints to ci/cd doc test + add missing param info to docs * fix(customer_endpoints.py): use new pydantic obj name * docs(user_management_heirarchy.md): add simple doc explaining teams/keys/org/users on litellm * Litellm dev 12 26 2024 p2 (#7432) * (Feat) Add logging for `POST v1/fine_tuning/jobs` (#7426) * init commit ft jobs logging * add ft logging * add logging for FineTuningJob * simple FT Job create test * (docs) - show all supported Azure OpenAI endpoints in overview (#7428) * azure batches * update doc * docs azure endpoints * docs endpoints on azure * docs azure batches api * docs azure batches api * fix(key_management_endpoints.py): fix key update to actually work * test(test_key_management.py): add e2e test asserting ui key update call works * fix: proxy/_types - fix linting erros * test: update test --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> * fix: test * fix(parallel_request_limiter.py): enforce tpm/rpm limits on key from tiers * fix: fix linting errors * test: fix test * fix: remove unused import * test: update test * docs(customer_endpoints.py): document new model_max_budget param * test: specify unique key alias * docs(budget_management_endpoints.py): document new model_max_budget param * test: fix test * test: fix tests --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
737 lines
23 KiB
Python
737 lines
23 KiB
Python
import sys, os, asyncio, time, random
|
|
from datetime import datetime
|
|
import traceback
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
import os, copy
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system-path
|
|
import pytest
|
|
from litellm import Router
|
|
from litellm.router_strategy.budget_limiter import RouterBudgetLimiting
|
|
from litellm.types.router import (
|
|
RoutingStrategy,
|
|
)
|
|
from litellm.types.utils import GenericBudgetConfigType, BudgetConfig
|
|
from litellm.caching.caching import DualCache, RedisCache
|
|
import logging
|
|
from litellm._logging import verbose_router_logger
|
|
import litellm
|
|
from datetime import timezone, timedelta
|
|
|
|
verbose_router_logger.setLevel(logging.DEBUG)
|
|
|
|
|
|
def cleanup_redis():
|
|
"""Cleanup Redis cache before each test"""
|
|
try:
|
|
import redis
|
|
|
|
print("cleaning up redis..")
|
|
|
|
redis_client = redis.Redis(
|
|
host=os.getenv("REDIS_HOST"),
|
|
port=int(os.getenv("REDIS_PORT")),
|
|
password=os.getenv("REDIS_PASSWORD"),
|
|
)
|
|
print("scan iter result", redis_client.scan_iter("provider_spend:*"))
|
|
# Delete all provider spend keys
|
|
for key in redis_client.scan_iter("provider_spend:*"):
|
|
print("deleting key", key)
|
|
redis_client.delete(key)
|
|
for key in redis_client.scan_iter("deployment_spend:*"):
|
|
print("deleting key", key)
|
|
redis_client.delete(key)
|
|
for key in redis_client.scan_iter("tag_spend:*"):
|
|
print("deleting key", key)
|
|
redis_client.delete(key)
|
|
except Exception as e:
|
|
print(f"Error cleaning up Redis: {str(e)}")
|
|
|
|
|
|
@pytest.mark.flaky(retries=6, delay=2)
|
|
@pytest.mark.asyncio
|
|
async def test_provider_budgets_e2e_test():
|
|
"""
|
|
Expected behavior:
|
|
- First request forced to OpenAI
|
|
- Hit OpenAI budget limit
|
|
- Next 3 requests all go to Azure
|
|
|
|
"""
|
|
cleanup_redis()
|
|
# Modify for test
|
|
provider_budget_config: GenericBudgetConfigType = {
|
|
"openai": BudgetConfig(time_period="1d", budget_limit=0.000000000001),
|
|
"azure": BudgetConfig(time_period="1d", budget_limit=100),
|
|
}
|
|
|
|
router = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "azure/chatgpt-v-2",
|
|
"api_key": os.getenv("AZURE_API_KEY"),
|
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
|
"api_base": os.getenv("AZURE_API_BASE"),
|
|
},
|
|
"model_info": {"id": "azure-model-id"},
|
|
},
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": {
|
|
"model": "openai/gpt-4o-mini",
|
|
},
|
|
"model_info": {"id": "openai-model-id"},
|
|
},
|
|
],
|
|
provider_budget_config=provider_budget_config,
|
|
redis_host=os.getenv("REDIS_HOST"),
|
|
redis_port=int(os.getenv("REDIS_PORT")),
|
|
redis_password=os.getenv("REDIS_PASSWORD"),
|
|
)
|
|
|
|
response = await router.acompletion(
|
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
model="openai/gpt-4o-mini",
|
|
)
|
|
print(response)
|
|
|
|
await asyncio.sleep(2.5)
|
|
|
|
for _ in range(3):
|
|
response = await router.acompletion(
|
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
model="gpt-3.5-turbo",
|
|
)
|
|
print(response)
|
|
|
|
print("response.hidden_params", response._hidden_params)
|
|
|
|
await asyncio.sleep(1)
|
|
|
|
assert response._hidden_params.get("custom_llm_provider") == "azure"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_provider_budgets_e2e_test_expect_to_fail():
|
|
"""
|
|
Expected behavior:
|
|
- first request passes, all subsequent requests fail
|
|
|
|
"""
|
|
cleanup_redis()
|
|
|
|
# Note: We intentionally use a dictionary with string keys for budget_limit and time_period
|
|
# we want to test that the router can handle type conversion, since the proxy config yaml passes these values as a dictionary
|
|
provider_budget_config = {
|
|
"anthropic": {
|
|
"budget_limit": 0.000000000001,
|
|
"time_period": "1d",
|
|
}
|
|
}
|
|
|
|
router = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "anthropic/*", # openai model name
|
|
"litellm_params": {
|
|
"model": "anthropic/*",
|
|
},
|
|
},
|
|
],
|
|
redis_host=os.getenv("REDIS_HOST"),
|
|
redis_port=int(os.getenv("REDIS_PORT")),
|
|
redis_password=os.getenv("REDIS_PASSWORD"),
|
|
provider_budget_config=provider_budget_config,
|
|
)
|
|
|
|
response = await router.acompletion(
|
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
model="anthropic/claude-3-5-sonnet-20240620",
|
|
)
|
|
print(response)
|
|
|
|
await asyncio.sleep(2.5)
|
|
|
|
for _ in range(3):
|
|
with pytest.raises(Exception) as exc_info:
|
|
response = await router.acompletion(
|
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
model="anthropic/claude-3-5-sonnet-20240620",
|
|
)
|
|
print(response)
|
|
print("response.hidden_params", response._hidden_params)
|
|
|
|
await asyncio.sleep(0.5)
|
|
# Verify the error is related to budget exceeded
|
|
|
|
assert "Exceeded budget for provider" in str(exc_info.value)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_llm_provider_for_deployment():
|
|
"""
|
|
Test the _get_llm_provider_for_deployment helper method
|
|
|
|
"""
|
|
cleanup_redis()
|
|
provider_budget = RouterBudgetLimiting(
|
|
dual_cache=DualCache(), provider_budget_config={}
|
|
)
|
|
|
|
# Test OpenAI deployment
|
|
openai_deployment = {"litellm_params": {"model": "openai/gpt-4"}}
|
|
assert (
|
|
provider_budget._get_llm_provider_for_deployment(openai_deployment) == "openai"
|
|
)
|
|
|
|
# Test Azure deployment
|
|
azure_deployment = {
|
|
"litellm_params": {
|
|
"model": "azure/gpt-4",
|
|
"api_key": "test",
|
|
"api_base": "test",
|
|
}
|
|
}
|
|
assert provider_budget._get_llm_provider_for_deployment(azure_deployment) == "azure"
|
|
|
|
# should not raise error for unknown deployment
|
|
unknown_deployment = {}
|
|
assert provider_budget._get_llm_provider_for_deployment(unknown_deployment) is None
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_budget_config_for_provider():
|
|
"""
|
|
Test the _get_budget_config_for_provider helper method
|
|
|
|
"""
|
|
cleanup_redis()
|
|
config = {
|
|
"openai": BudgetConfig(budget_duration="1d", max_budget=100),
|
|
"anthropic": BudgetConfig(budget_duration="7d", max_budget=500),
|
|
}
|
|
|
|
provider_budget = RouterBudgetLimiting(
|
|
dual_cache=DualCache(), provider_budget_config=config
|
|
)
|
|
|
|
# Test existing providers
|
|
openai_config = provider_budget._get_budget_config_for_provider("openai")
|
|
assert openai_config is not None
|
|
assert openai_config.budget_duration == "1d"
|
|
assert openai_config.max_budget == 100
|
|
|
|
anthropic_config = provider_budget._get_budget_config_for_provider("anthropic")
|
|
assert anthropic_config is not None
|
|
assert anthropic_config.budget_duration == "7d"
|
|
assert anthropic_config.max_budget == 500
|
|
|
|
# Test non-existent provider
|
|
assert provider_budget._get_budget_config_for_provider("unknown") is None
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_prometheus_metric_tracking():
|
|
"""
|
|
Test that the Prometheus metric for provider budget is tracked correctly
|
|
"""
|
|
cleanup_redis()
|
|
from unittest.mock import MagicMock
|
|
from litellm.integrations.prometheus import PrometheusLogger
|
|
|
|
# Create a mock PrometheusLogger
|
|
mock_prometheus = MagicMock(spec=PrometheusLogger)
|
|
|
|
# Setup provider budget limiting
|
|
provider_budget = RouterBudgetLimiting(
|
|
dual_cache=DualCache(),
|
|
provider_budget_config={
|
|
"openai": BudgetConfig(budget_duration="1d", max_budget=100)
|
|
},
|
|
)
|
|
|
|
litellm._async_success_callback = [mock_prometheus]
|
|
|
|
provider_budget_config: GenericBudgetConfigType = {
|
|
"openai": BudgetConfig(budget_duration="1d", max_budget=0.000000000001),
|
|
"azure": BudgetConfig(budget_duration="1d", max_budget=100),
|
|
}
|
|
|
|
router = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "azure/chatgpt-v-2",
|
|
"api_key": os.getenv("AZURE_API_KEY"),
|
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
|
"api_base": os.getenv("AZURE_API_BASE"),
|
|
},
|
|
"model_info": {"id": "azure-model-id"},
|
|
},
|
|
{
|
|
"model_name": "gpt-3.5-turbo", # openai model name
|
|
"litellm_params": {
|
|
"model": "openai/gpt-4o-mini",
|
|
},
|
|
"model_info": {"id": "openai-model-id"},
|
|
},
|
|
],
|
|
provider_budget_config=provider_budget_config,
|
|
redis_host=os.getenv("REDIS_HOST"),
|
|
redis_port=int(os.getenv("REDIS_PORT")),
|
|
redis_password=os.getenv("REDIS_PASSWORD"),
|
|
)
|
|
|
|
try:
|
|
response = await router.acompletion(
|
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
model="openai/gpt-4o-mini",
|
|
mock_response="hi",
|
|
)
|
|
print(response)
|
|
except Exception as e:
|
|
print("error", e)
|
|
|
|
await asyncio.sleep(2.5)
|
|
|
|
# Verify the mock was called correctly
|
|
mock_prometheus.track_provider_remaining_budget.assert_called()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_handle_new_budget_window():
|
|
"""
|
|
Test _handle_new_budget_window helper method
|
|
|
|
Current
|
|
"""
|
|
cleanup_redis()
|
|
provider_budget = RouterBudgetLimiting(
|
|
dual_cache=DualCache(), provider_budget_config={}
|
|
)
|
|
|
|
spend_key = "provider_spend:openai:7d"
|
|
start_time_key = "provider_budget_start_time:openai"
|
|
current_time = 1000.0
|
|
response_cost = 0.5
|
|
ttl_seconds = 86400 # 1 day
|
|
|
|
# Test handling new budget window
|
|
new_start_time = await provider_budget._handle_new_budget_window(
|
|
spend_key=spend_key,
|
|
start_time_key=start_time_key,
|
|
current_time=current_time,
|
|
response_cost=response_cost,
|
|
ttl_seconds=ttl_seconds,
|
|
)
|
|
|
|
assert new_start_time == current_time
|
|
|
|
# Verify the spend was set correctly
|
|
spend = await provider_budget.dual_cache.async_get_cache(spend_key)
|
|
print("spend in cache for key", spend_key, "is", spend)
|
|
assert float(spend) == response_cost
|
|
|
|
# Verify start time was set correctly
|
|
start_time = await provider_budget.dual_cache.async_get_cache(start_time_key)
|
|
print("start time in cache for key", start_time_key, "is", start_time)
|
|
assert float(start_time) == current_time
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_or_set_budget_start_time():
|
|
"""
|
|
Test _get_or_set_budget_start_time helper method
|
|
|
|
scenario 1: no existing start time in cache, should return current time
|
|
scenario 2: existing start time in cache, should return existing start time
|
|
"""
|
|
cleanup_redis()
|
|
provider_budget = RouterBudgetLimiting(
|
|
dual_cache=DualCache(), provider_budget_config={}
|
|
)
|
|
|
|
start_time_key = "test_start_time"
|
|
current_time = 1000.0
|
|
ttl_seconds = 86400 # 1 day
|
|
|
|
# When there is no existing start time, we should set it to the current time
|
|
start_time = await provider_budget._get_or_set_budget_start_time(
|
|
start_time_key=start_time_key,
|
|
current_time=current_time,
|
|
ttl_seconds=ttl_seconds,
|
|
)
|
|
print("budget start time when no existing start time is in cache", start_time)
|
|
assert start_time == current_time
|
|
|
|
# When there is an existing start time, we should return it even if the current time is later
|
|
new_current_time = 2000.0
|
|
existing_start_time = await provider_budget._get_or_set_budget_start_time(
|
|
start_time_key=start_time_key,
|
|
current_time=new_current_time,
|
|
ttl_seconds=ttl_seconds,
|
|
)
|
|
print(
|
|
"budget start time when existing start time is in cache, but current time is later",
|
|
existing_start_time,
|
|
)
|
|
assert existing_start_time == current_time # Should return the original start time
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_increment_spend_in_current_window():
|
|
"""
|
|
Test _increment_spend_in_current_window helper method
|
|
|
|
Expected behavior:
|
|
- Increment the spend in memory cache
|
|
- Queue the increment operation to Redis
|
|
"""
|
|
cleanup_redis()
|
|
provider_budget = RouterBudgetLimiting(
|
|
dual_cache=DualCache(), provider_budget_config={}
|
|
)
|
|
|
|
spend_key = "provider_spend:openai:1d"
|
|
response_cost = 0.5
|
|
ttl = 86400 # 1 day
|
|
|
|
# Set initial spend
|
|
await provider_budget.dual_cache.async_set_cache(key=spend_key, value=1.0, ttl=ttl)
|
|
|
|
# Test incrementing spend
|
|
await provider_budget._increment_spend_in_current_window(
|
|
spend_key=spend_key,
|
|
response_cost=response_cost,
|
|
ttl=ttl,
|
|
)
|
|
|
|
# Verify the spend was incremented correctly in memory
|
|
spend = await provider_budget.dual_cache.async_get_cache(spend_key)
|
|
assert float(spend) == 1.5
|
|
|
|
# Verify the increment operation was queued for Redis
|
|
print(
|
|
"redis_increment_operation_queue",
|
|
provider_budget.redis_increment_operation_queue,
|
|
)
|
|
assert len(provider_budget.redis_increment_operation_queue) == 1
|
|
queued_op = provider_budget.redis_increment_operation_queue[0]
|
|
assert queued_op["key"] == spend_key
|
|
assert queued_op["increment_value"] == response_cost
|
|
assert queued_op["ttl"] == ttl
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_sync_in_memory_spend_with_redis():
|
|
"""
|
|
Test _sync_in_memory_spend_with_redis helper method
|
|
|
|
Expected behavior:
|
|
- Push all provider spend increments to Redis
|
|
- Fetch all current provider spend from Redis to update in-memory cache
|
|
"""
|
|
cleanup_redis()
|
|
provider_budget_config = {
|
|
"openai": BudgetConfig(time_period="1d", budget_limit=100),
|
|
"anthropic": BudgetConfig(time_period="1d", budget_limit=200),
|
|
}
|
|
|
|
provider_budget = RouterBudgetLimiting(
|
|
dual_cache=DualCache(
|
|
redis_cache=RedisCache(
|
|
host=os.getenv("REDIS_HOST"),
|
|
port=int(os.getenv("REDIS_PORT")),
|
|
password=os.getenv("REDIS_PASSWORD"),
|
|
)
|
|
),
|
|
provider_budget_config=provider_budget_config,
|
|
)
|
|
|
|
# Set some values in Redis
|
|
spend_key_openai = "provider_spend:openai:1d"
|
|
spend_key_anthropic = "provider_spend:anthropic:1d"
|
|
|
|
await provider_budget.dual_cache.redis_cache.async_set_cache(
|
|
key=spend_key_openai, value=50.0
|
|
)
|
|
await provider_budget.dual_cache.redis_cache.async_set_cache(
|
|
key=spend_key_anthropic, value=75.0
|
|
)
|
|
|
|
# Test syncing with Redis
|
|
await provider_budget._sync_in_memory_spend_with_redis()
|
|
|
|
# Verify in-memory cache was updated
|
|
openai_spend = await provider_budget.dual_cache.in_memory_cache.async_get_cache(
|
|
spend_key_openai
|
|
)
|
|
anthropic_spend = await provider_budget.dual_cache.in_memory_cache.async_get_cache(
|
|
spend_key_anthropic
|
|
)
|
|
|
|
assert float(openai_spend) == 50.0
|
|
assert float(anthropic_spend) == 75.0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_current_provider_spend():
|
|
"""
|
|
Test _get_current_provider_spend helper method
|
|
|
|
Scenarios:
|
|
1. Provider with no budget config returns None
|
|
2. Provider with budget config but no spend returns 0.0
|
|
3. Provider with budget config and spend returns correct value
|
|
"""
|
|
cleanup_redis()
|
|
provider_budget = RouterBudgetLimiting(
|
|
dual_cache=DualCache(),
|
|
provider_budget_config={
|
|
"openai": BudgetConfig(time_period="1d", budget_limit=100),
|
|
},
|
|
)
|
|
|
|
# Test provider with no budget config
|
|
spend = await provider_budget._get_current_provider_spend("anthropic")
|
|
assert spend is None
|
|
|
|
# Test provider with budget config but no spend
|
|
spend = await provider_budget._get_current_provider_spend("openai")
|
|
assert spend == 0.0
|
|
|
|
# Test provider with budget config and spend
|
|
spend_key = "provider_spend:openai:1d"
|
|
await provider_budget.dual_cache.async_set_cache(key=spend_key, value=50.5)
|
|
|
|
spend = await provider_budget._get_current_provider_spend("openai")
|
|
assert spend == 50.5
|
|
|
|
|
|
@pytest.mark.flaky(retries=6, delay=2)
|
|
@pytest.mark.asyncio
|
|
async def test_get_current_provider_budget_reset_at():
|
|
"""
|
|
Test _get_current_provider_budget_reset_at helper method
|
|
|
|
Scenarios:
|
|
1. Provider with no budget config returns None
|
|
2. Provider with budget config but no TTL returns None
|
|
3. Provider with budget config and TTL returns correct ISO timestamp
|
|
"""
|
|
cleanup_redis()
|
|
provider_budget = RouterBudgetLimiting(
|
|
dual_cache=DualCache(
|
|
redis_cache=RedisCache(
|
|
host=os.getenv("REDIS_HOST"),
|
|
port=int(os.getenv("REDIS_PORT")),
|
|
password=os.getenv("REDIS_PASSWORD"),
|
|
)
|
|
),
|
|
provider_budget_config={
|
|
"openai": BudgetConfig(budget_duration="1d", max_budget=100),
|
|
"vertex_ai": BudgetConfig(budget_duration="1h", max_budget=100),
|
|
},
|
|
)
|
|
|
|
await asyncio.sleep(2)
|
|
|
|
# Test provider with no budget config
|
|
reset_at = await provider_budget._get_current_provider_budget_reset_at("anthropic")
|
|
assert reset_at is None
|
|
|
|
# Test provider with budget config but no TTL
|
|
reset_at = await provider_budget._get_current_provider_budget_reset_at("openai")
|
|
assert reset_at is not None
|
|
reset_time = datetime.fromisoformat(reset_at.replace("Z", "+00:00"))
|
|
expected_time = datetime.now(timezone.utc) + timedelta(seconds=(24 * 60 * 60))
|
|
time_difference = abs((reset_time - expected_time).total_seconds())
|
|
assert time_difference < 5
|
|
|
|
# Test provider with budget config and TTL
|
|
reset_at = await provider_budget._get_current_provider_budget_reset_at("vertex_ai")
|
|
assert reset_at is not None
|
|
|
|
# Verify the timestamp format and approximate time
|
|
reset_time = datetime.fromisoformat(reset_at.replace("Z", "+00:00"))
|
|
expected_time = datetime.now(timezone.utc) + timedelta(seconds=3600)
|
|
|
|
# Allow for small time differences (within 5 seconds)
|
|
time_difference = abs((reset_time - expected_time).total_seconds())
|
|
assert time_difference < 5
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_deployment_budget_limits_e2e_test():
|
|
"""
|
|
Expected behavior:
|
|
- First request forced to openai/gpt-4o
|
|
- Hit budget limit for openai/gpt-4o
|
|
- Next 3 requests all go to openai/gpt-4o-mini
|
|
|
|
"""
|
|
litellm.set_verbose = True
|
|
cleanup_redis()
|
|
# Modify for test
|
|
|
|
router = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "gpt-4o", # openai model name
|
|
"litellm_params": { # params for litellm completion/embedding call
|
|
"model": "openai/gpt-4o",
|
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
"max_budget": 0.000000000001,
|
|
"budget_duration": "1d",
|
|
},
|
|
"model_info": {"id": "openai-gpt-4o"},
|
|
},
|
|
{
|
|
"model_name": "gpt-4o", # openai model name
|
|
"litellm_params": {
|
|
"model": "openai/gpt-4o-mini",
|
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
"max_budget": 10,
|
|
"budget_duration": "20d",
|
|
},
|
|
"model_info": {"id": "openai-gpt-4o-mini"},
|
|
},
|
|
],
|
|
)
|
|
|
|
response = await router.acompletion(
|
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
model="openai-gpt-4o",
|
|
)
|
|
print(response)
|
|
|
|
await asyncio.sleep(2.5)
|
|
|
|
for _ in range(3):
|
|
response = await router.acompletion(
|
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
model="gpt-4o",
|
|
)
|
|
print(response)
|
|
await asyncio.sleep(1)
|
|
|
|
print("response.hidden_params", response._hidden_params)
|
|
assert response._hidden_params.get("model_id") == "openai-gpt-4o-mini"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_deployment_budgets_e2e_test_expect_to_fail():
|
|
"""
|
|
Expected behavior:
|
|
- first request passes, all subsequent requests fail
|
|
|
|
"""
|
|
cleanup_redis()
|
|
|
|
router = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "openai/gpt-4o-mini", # openai model name
|
|
"litellm_params": {
|
|
"model": "openai/gpt-4o-mini",
|
|
"max_budget": 0.000000000001,
|
|
"budget_duration": "1d",
|
|
},
|
|
},
|
|
],
|
|
redis_host=os.getenv("REDIS_HOST"),
|
|
redis_port=int(os.getenv("REDIS_PORT")),
|
|
redis_password=os.getenv("REDIS_PASSWORD"),
|
|
)
|
|
|
|
response = await router.acompletion(
|
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
model="openai/gpt-4o-mini",
|
|
)
|
|
print(response)
|
|
|
|
await asyncio.sleep(2.5)
|
|
|
|
for _ in range(3):
|
|
with pytest.raises(Exception) as exc_info:
|
|
response = await router.acompletion(
|
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
model="openai/gpt-4o-mini",
|
|
)
|
|
print(response)
|
|
print("response.hidden_params", response._hidden_params)
|
|
|
|
await asyncio.sleep(0.5)
|
|
# Verify the error is related to budget exceeded
|
|
|
|
assert "Exceeded budget for deployment" in str(exc_info.value)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_tag_budgets_e2e_test_expect_to_fail():
|
|
"""
|
|
Expected behavior:
|
|
- first request passes, all subsequent requests fail
|
|
|
|
"""
|
|
cleanup_redis()
|
|
TAG_NAME = "product:chat-bot"
|
|
TAG_NAME_2 = "product:chat-bot-2"
|
|
litellm.tag_budget_config = {
|
|
TAG_NAME: BudgetConfig(max_budget=0.000000000001, budget_duration="1d"),
|
|
TAG_NAME_2: BudgetConfig(max_budget=100, budget_duration="1d"),
|
|
}
|
|
|
|
router = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "openai/gpt-4o-mini", # openai model name
|
|
"litellm_params": {
|
|
"model": "openai/gpt-4o-mini",
|
|
},
|
|
},
|
|
],
|
|
redis_host=os.getenv("REDIS_HOST"),
|
|
redis_port=int(os.getenv("REDIS_PORT")),
|
|
redis_password=os.getenv("REDIS_PASSWORD"),
|
|
)
|
|
|
|
response = await router.acompletion(
|
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
model="openai/gpt-4o-mini",
|
|
metadata={"tags": [TAG_NAME]},
|
|
)
|
|
print(response)
|
|
|
|
await asyncio.sleep(2.5)
|
|
|
|
for _ in range(3):
|
|
with pytest.raises(Exception) as exc_info:
|
|
response = await router.acompletion(
|
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
model="openai/gpt-4o-mini",
|
|
metadata={"tags": [TAG_NAME]},
|
|
)
|
|
print(response)
|
|
print("response.hidden_params", response._hidden_params)
|
|
|
|
await asyncio.sleep(0.5)
|
|
# Verify the error is related to budget exceeded
|
|
|
|
assert f"Exceeded budget for tag='{TAG_NAME}'" in str(exc_info.value)
|
|
|
|
# test with tag-2 expect to pass
|
|
for _ in range(2):
|
|
response = await router.acompletion(
|
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
model="openai/gpt-4o-mini",
|
|
metadata={"tags": [TAG_NAME_2]},
|
|
)
|
|
print(response)
|