diff --git a/proxy_server_config.yaml b/proxy_server_config.yaml index d1d06eb58..63c50953a 100644 --- a/proxy_server_config.yaml +++ b/proxy_server_config.yaml @@ -48,7 +48,13 @@ model_list: model: openai/fake api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ - + - model_name: fake-openai-endpoint-2 + litellm_params: + model: openai/my-fake-model + api_key: my-fake-key + api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/ + stream_timeout: 0.001 + rpm: 1 litellm_settings: drop_params: True # max_budget: 100 @@ -58,6 +64,13 @@ litellm_settings: telemetry: False context_window_fallbacks: [{"gpt-3.5-turbo": ["gpt-3.5-turbo-large"]}] +router_settings: + routing_strategy: usage-based-routing-v2 + redis_host: os.environ/REDIS_HOST + redis_password: os.environ/REDIS_PASSWORD + redis_port: os.environ/REDIS_PORT + enable_pre_call_checks: true + general_settings: master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys store_model_in_db: True diff --git a/tests/test_openai_endpoints.py b/tests/test_openai_endpoints.py index 2137b2665..6fbdb7be5 100644 --- a/tests/test_openai_endpoints.py +++ b/tests/test_openai_endpoints.py @@ -18,7 +18,12 @@ async def generate_key(session): url = "http://0.0.0.0:4000/key/generate" headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"} data = { - "models": ["gpt-4", "text-embedding-ada-002", "dall-e-2"], + "models": [ + "gpt-4", + "text-embedding-ada-002", + "dall-e-2", + "fake-openai-endpoint-2", + ], "duration": None, } @@ -63,14 +68,14 @@ async def new_user(session): return await response.json() -async def chat_completion(session, key): +async def chat_completion(session, key, model="gpt-4"): url = "http://0.0.0.0:4000/chat/completions" headers = { "Authorization": f"Bearer {key}", "Content-Type": "application/json", } data = { - "model": "gpt-4", + "model": model, "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}, @@ -189,6 +194,31 @@ async def test_chat_completion(): await chat_completion(session=session, key=key_2) +@pytest.mark.skip(reason="Local test. Proxy not concurrency safe yet. WIP.") +@pytest.mark.asyncio +async def test_chat_completion_ratelimit(): + """ + - call model with rpm 1 + - make 2 parallel calls + - make sure 1 fails + """ + async with aiohttp.ClientSession() as session: + # key_gen = await generate_key(session=session) + key = "sk-1234" + tasks = [] + tasks.append( + chat_completion(session=session, key=key, model="fake-openai-endpoint-2") + ) + tasks.append( + chat_completion(session=session, key=key, model="fake-openai-endpoint-2") + ) + try: + await asyncio.gather(*tasks) + pytest.fail("Expected at least 1 call to fail") + except Exception as e: + pass + + @pytest.mark.asyncio async def test_chat_completion_old_key(): """