forked from phoenix/litellm-mirror
73 lines
1.8 KiB
Python
73 lines
1.8 KiB
Python
# What is this?
|
|
## Unit tests for 'dynamic_rate_limiter.py`
|
|
import asyncio
|
|
import os
|
|
import random
|
|
import sys
|
|
import time
|
|
import traceback
|
|
from datetime import datetime
|
|
from typing import Tuple
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
import os
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
import pytest
|
|
|
|
import litellm
|
|
from litellm import DualCache, Router
|
|
from litellm.proxy.hooks.dynamic_rate_limiter import (
|
|
_PROXY_DynamicRateLimitHandler as DynamicRateLimitHandler,
|
|
)
|
|
|
|
"""
|
|
Basic test cases:
|
|
|
|
- If 1 'active' project => give all tpm
|
|
- If 2 'active' projects => divide tpm in 2
|
|
"""
|
|
|
|
|
|
@pytest.fixture
|
|
def dynamic_rate_limit_handler() -> DynamicRateLimitHandler:
|
|
internal_cache = DualCache()
|
|
return DynamicRateLimitHandler(internal_usage_cache=internal_cache)
|
|
|
|
|
|
@pytest.mark.parametrize("num_projects", [1, 2, 100])
|
|
@pytest.mark.asyncio
|
|
async def test_available_tpm(num_projects, dynamic_rate_limit_handler):
|
|
model = "my-fake-model"
|
|
## SET CACHE W/ ACTIVE PROJECTS
|
|
await dynamic_rate_limit_handler.internal_usage_cache.async_increment_cache(
|
|
model=model, value=num_projects
|
|
)
|
|
|
|
model_tpm = 100
|
|
llm_router = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": model,
|
|
"litellm_params": {
|
|
"model": "gpt-3.5-turbo",
|
|
"api_key": "my-key",
|
|
"api_base": "my-base",
|
|
"tpm": model_tpm,
|
|
},
|
|
}
|
|
]
|
|
)
|
|
dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
|
|
|
|
## CHECK AVAILABLE TPM PER PROJECT
|
|
|
|
availability = await dynamic_rate_limit_handler.check_available_tpm(model=model)
|
|
|
|
expected_availability = int(model_tpm / num_projects)
|
|
|
|
assert availability == expected_availability
|