litellm/tests/router_unit_tests/test_router_endpoints.py
Krish Dholakia 21156ff5d0
LiteLLM Minor Fixes & Improvements (11/27/2024) (#6943)
* fix(http_parsing_utils.py): remove `ast.literal_eval()` from http utils

Security fix - https://huntr.com/bounties/96a32812-213c-4819-ba4e-36143d35e95b?token=bf414bbd77f8b346556e
64ab2dd9301ea44339910877ea50401c76f977e36cdd78272f5fb4ca852a88a7e832828aae1192df98680544ee24aa98f3cf6980d8
bab641a66b7ccbc02c0e7d4ddba2db4dbe7318889dc0098d8db2d639f345f574159814627bb084563bad472e2f990f825bff0878a9
e281e72c88b4bc5884d637d186c0d67c9987c57c3f0caf395aff07b89ad2b7220d1dd7d1b427fd2260b5f01090efce5250f8b56ea2
c0ec19916c24b23825d85ce119911275944c840a1340d69e23ca6a462da610

* fix(converse/transformation.py): support bedrock apac cross region inference

Fixes https://github.com/BerriAI/litellm/issues/6905

* fix(user_api_key_auth.py): add auth check for websocket endpoint

Fixes https://github.com/BerriAI/litellm/issues/6926

* fix(user_api_key_auth.py): use `model` from query param

* fix: fix linting error

* test: run flaky tests first
2024-11-28 00:32:46 +05:30

280 lines
7.8 KiB
Python

import sys
import os
import traceback
from dotenv import load_dotenv
from fastapi import Request
from datetime import datetime
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from litellm import Router, CustomLogger
# Get the current directory of the file being run
pwd = os.path.dirname(os.path.realpath(__file__))
print(pwd)
file_path = os.path.join(pwd, "gettysburg.wav")
audio_file = open(file_path, "rb")
from pathlib import Path
import litellm
import pytest
import asyncio
@pytest.fixture
def model_list():
return [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
{
"model_name": "gpt-4o",
"litellm_params": {
"model": "gpt-4o",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
{
"model_name": "dall-e-3",
"litellm_params": {
"model": "dall-e-3",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
{
"model_name": "cohere-rerank",
"litellm_params": {
"model": "cohere/rerank-english-v3.0",
"api_key": os.getenv("COHERE_API_KEY"),
},
},
{
"model_name": "claude-3-5-sonnet-20240620",
"litellm_params": {
"model": "gpt-3.5-turbo",
"mock_response": "hi this is macintosh.",
},
},
]
# This file includes the custom callbacks for LiteLLM Proxy
# Once defined, these can be passed in proxy_config.yaml
class MyCustomHandler(CustomLogger):
def __init__(self):
self.openai_client = None
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
try:
# init logging config
print("logging a transcript kwargs: ", kwargs)
print("openai client=", kwargs.get("client"))
self.openai_client = kwargs.get("client")
except Exception:
pass
proxy_handler_instance = MyCustomHandler()
# Set litellm.callbacks = [proxy_handler_instance] on the proxy
# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
@pytest.mark.asyncio
@pytest.mark.flaky(retries=6, delay=10)
async def test_transcription_on_router():
litellm.set_verbose = True
litellm.callbacks = [proxy_handler_instance]
print("\n Testing async transcription on router\n")
try:
model_list = [
{
"model_name": "whisper",
"litellm_params": {
"model": "whisper-1",
},
},
{
"model_name": "whisper",
"litellm_params": {
"model": "azure/azure-whisper",
"api_base": "https://my-endpoint-europe-berri-992.openai.azure.com/",
"api_key": os.getenv("AZURE_EUROPE_API_KEY"),
"api_version": "2024-02-15-preview",
},
},
]
router = Router(model_list=model_list)
router_level_clients = []
for deployment in router.model_list:
_deployment_openai_client = router._get_client(
deployment=deployment,
kwargs={"model": "whisper-1"},
client_type="async",
)
router_level_clients.append(str(_deployment_openai_client))
## test 1: user facing function
response = await router.atranscription(
model="whisper",
file=audio_file,
)
## test 2: underlying function
response = await router._atranscription(
model="whisper",
file=audio_file,
)
print(response)
# PROD Test
# Ensure we ONLY use OpenAI/Azure client initialized on the router level
await asyncio.sleep(5)
print("OpenAI Client used= ", proxy_handler_instance.openai_client)
print("all router level clients= ", router_level_clients)
assert proxy_handler_instance.openai_client in router_level_clients
except Exception as e:
traceback.print_exc()
pytest.fail(f"Error occurred: {e}")
@pytest.mark.parametrize("mode", ["iterator"]) # "file",
@pytest.mark.asyncio
async def test_audio_speech_router(mode):
from litellm import Router
client = Router(
model_list=[
{
"model_name": "tts",
"litellm_params": {
"model": "openai/tts-1",
},
},
]
)
response = await client.aspeech(
model="tts",
voice="alloy",
input="the quick brown fox jumped over the lazy dogs",
api_base=None,
api_key=None,
organization=None,
project=None,
max_retries=1,
timeout=600,
client=None,
optional_params={},
)
from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent
assert isinstance(response, HttpxBinaryResponseContent)
@pytest.mark.asyncio()
async def test_rerank_endpoint(model_list):
from litellm.types.utils import RerankResponse
router = Router(model_list=model_list)
## Test 1: user facing function
response = await router.arerank(
model="cohere-rerank",
query="hello",
documents=["hello", "world"],
top_n=3,
)
## Test 2: underlying function
response = await router._arerank(
model="cohere-rerank",
query="hello",
documents=["hello", "world"],
top_n=3,
)
print("async re rank response: ", response)
assert response.id is not None
assert response.results is not None
RerankResponse.model_validate(response)
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_aaaaatext_completion_endpoint(model_list, sync_mode):
router = Router(model_list=model_list)
if sync_mode:
response = router.text_completion(
model="gpt-3.5-turbo",
prompt="Hello, how are you?",
mock_response="I'm fine, thank you!",
)
else:
## Test 1: user facing function
response = await router.atext_completion(
model="gpt-3.5-turbo",
prompt="Hello, how are you?",
mock_response="I'm fine, thank you!",
)
## Test 2: underlying function
response_2 = await router._atext_completion(
model="gpt-3.5-turbo",
prompt="Hello, how are you?",
mock_response="I'm fine, thank you!",
)
assert response_2.choices[0].text == "I'm fine, thank you!"
assert response.choices[0].text == "I'm fine, thank you!"
@pytest.mark.asyncio
async def test_anthropic_router_completion_e2e(model_list):
from litellm.adapters.anthropic_adapter import anthropic_adapter
from litellm.types.llms.anthropic import AnthropicResponse
litellm.set_verbose = True
litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
router = Router(model_list=model_list)
messages = [{"role": "user", "content": "Hey, how's it going?"}]
## Test 1: user facing function
response = await router.aadapter_completion(
model="claude-3-5-sonnet-20240620",
messages=messages,
adapter_id="anthropic",
mock_response="This is a fake call",
)
## Test 2: underlying function
await router._aadapter_completion(
model="claude-3-5-sonnet-20240620",
messages=messages,
adapter_id="anthropic",
mock_response="This is a fake call",
)
print("Response: {}".format(response))
assert response is not None
AnthropicResponse.model_validate(response)
assert response.model == "gpt-3.5-turbo"