forked from phoenix/litellm-mirror
* fix(http_parsing_utils.py): remove `ast.literal_eval()` from http utils Security fix - https://huntr.com/bounties/96a32812-213c-4819-ba4e-36143d35e95b?token=bf414bbd77f8b346556e 64ab2dd9301ea44339910877ea50401c76f977e36cdd78272f5fb4ca852a88a7e832828aae1192df98680544ee24aa98f3cf6980d8 bab641a66b7ccbc02c0e7d4ddba2db4dbe7318889dc0098d8db2d639f345f574159814627bb084563bad472e2f990f825bff0878a9 e281e72c88b4bc5884d637d186c0d67c9987c57c3f0caf395aff07b89ad2b7220d1dd7d1b427fd2260b5f01090efce5250f8b56ea2 c0ec19916c24b23825d85ce119911275944c840a1340d69e23ca6a462da610 * fix(converse/transformation.py): support bedrock apac cross region inference Fixes https://github.com/BerriAI/litellm/issues/6905 * fix(user_api_key_auth.py): add auth check for websocket endpoint Fixes https://github.com/BerriAI/litellm/issues/6926 * fix(user_api_key_auth.py): use `model` from query param * fix: fix linting error * test: run flaky tests first
280 lines
7.8 KiB
Python
280 lines
7.8 KiB
Python
import sys
|
|
import os
|
|
import traceback
|
|
from dotenv import load_dotenv
|
|
from fastapi import Request
|
|
from datetime import datetime
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
from litellm import Router, CustomLogger
|
|
|
|
# Get the current directory of the file being run
|
|
pwd = os.path.dirname(os.path.realpath(__file__))
|
|
print(pwd)
|
|
|
|
file_path = os.path.join(pwd, "gettysburg.wav")
|
|
|
|
audio_file = open(file_path, "rb")
|
|
from pathlib import Path
|
|
import litellm
|
|
import pytest
|
|
import asyncio
|
|
|
|
|
|
@pytest.fixture
|
|
def model_list():
|
|
return [
|
|
{
|
|
"model_name": "gpt-3.5-turbo",
|
|
"litellm_params": {
|
|
"model": "gpt-3.5-turbo",
|
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
},
|
|
},
|
|
{
|
|
"model_name": "gpt-4o",
|
|
"litellm_params": {
|
|
"model": "gpt-4o",
|
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
},
|
|
},
|
|
{
|
|
"model_name": "dall-e-3",
|
|
"litellm_params": {
|
|
"model": "dall-e-3",
|
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
},
|
|
},
|
|
{
|
|
"model_name": "cohere-rerank",
|
|
"litellm_params": {
|
|
"model": "cohere/rerank-english-v3.0",
|
|
"api_key": os.getenv("COHERE_API_KEY"),
|
|
},
|
|
},
|
|
{
|
|
"model_name": "claude-3-5-sonnet-20240620",
|
|
"litellm_params": {
|
|
"model": "gpt-3.5-turbo",
|
|
"mock_response": "hi this is macintosh.",
|
|
},
|
|
},
|
|
]
|
|
|
|
|
|
# This file includes the custom callbacks for LiteLLM Proxy
|
|
# Once defined, these can be passed in proxy_config.yaml
|
|
class MyCustomHandler(CustomLogger):
|
|
def __init__(self):
|
|
self.openai_client = None
|
|
|
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
|
try:
|
|
# init logging config
|
|
print("logging a transcript kwargs: ", kwargs)
|
|
print("openai client=", kwargs.get("client"))
|
|
self.openai_client = kwargs.get("client")
|
|
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
proxy_handler_instance = MyCustomHandler()
|
|
|
|
|
|
# Set litellm.callbacks = [proxy_handler_instance] on the proxy
|
|
# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.flaky(retries=6, delay=10)
|
|
async def test_transcription_on_router():
|
|
litellm.set_verbose = True
|
|
litellm.callbacks = [proxy_handler_instance]
|
|
print("\n Testing async transcription on router\n")
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "whisper",
|
|
"litellm_params": {
|
|
"model": "whisper-1",
|
|
},
|
|
},
|
|
{
|
|
"model_name": "whisper",
|
|
"litellm_params": {
|
|
"model": "azure/azure-whisper",
|
|
"api_base": "https://my-endpoint-europe-berri-992.openai.azure.com/",
|
|
"api_key": os.getenv("AZURE_EUROPE_API_KEY"),
|
|
"api_version": "2024-02-15-preview",
|
|
},
|
|
},
|
|
]
|
|
|
|
router = Router(model_list=model_list)
|
|
|
|
router_level_clients = []
|
|
for deployment in router.model_list:
|
|
_deployment_openai_client = router._get_client(
|
|
deployment=deployment,
|
|
kwargs={"model": "whisper-1"},
|
|
client_type="async",
|
|
)
|
|
|
|
router_level_clients.append(str(_deployment_openai_client))
|
|
|
|
## test 1: user facing function
|
|
response = await router.atranscription(
|
|
model="whisper",
|
|
file=audio_file,
|
|
)
|
|
|
|
## test 2: underlying function
|
|
response = await router._atranscription(
|
|
model="whisper",
|
|
file=audio_file,
|
|
)
|
|
print(response)
|
|
|
|
# PROD Test
|
|
# Ensure we ONLY use OpenAI/Azure client initialized on the router level
|
|
await asyncio.sleep(5)
|
|
print("OpenAI Client used= ", proxy_handler_instance.openai_client)
|
|
print("all router level clients= ", router_level_clients)
|
|
assert proxy_handler_instance.openai_client in router_level_clients
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
@pytest.mark.parametrize("mode", ["iterator"]) # "file",
|
|
@pytest.mark.asyncio
|
|
async def test_audio_speech_router(mode):
|
|
|
|
from litellm import Router
|
|
|
|
client = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "tts",
|
|
"litellm_params": {
|
|
"model": "openai/tts-1",
|
|
},
|
|
},
|
|
]
|
|
)
|
|
|
|
response = await client.aspeech(
|
|
model="tts",
|
|
voice="alloy",
|
|
input="the quick brown fox jumped over the lazy dogs",
|
|
api_base=None,
|
|
api_key=None,
|
|
organization=None,
|
|
project=None,
|
|
max_retries=1,
|
|
timeout=600,
|
|
client=None,
|
|
optional_params={},
|
|
)
|
|
|
|
from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent
|
|
|
|
assert isinstance(response, HttpxBinaryResponseContent)
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_rerank_endpoint(model_list):
|
|
from litellm.types.utils import RerankResponse
|
|
|
|
router = Router(model_list=model_list)
|
|
|
|
## Test 1: user facing function
|
|
response = await router.arerank(
|
|
model="cohere-rerank",
|
|
query="hello",
|
|
documents=["hello", "world"],
|
|
top_n=3,
|
|
)
|
|
|
|
## Test 2: underlying function
|
|
response = await router._arerank(
|
|
model="cohere-rerank",
|
|
query="hello",
|
|
documents=["hello", "world"],
|
|
top_n=3,
|
|
)
|
|
|
|
print("async re rank response: ", response)
|
|
|
|
assert response.id is not None
|
|
assert response.results is not None
|
|
|
|
RerankResponse.model_validate(response)
|
|
|
|
|
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
|
@pytest.mark.asyncio
|
|
async def test_aaaaatext_completion_endpoint(model_list, sync_mode):
|
|
router = Router(model_list=model_list)
|
|
|
|
if sync_mode:
|
|
response = router.text_completion(
|
|
model="gpt-3.5-turbo",
|
|
prompt="Hello, how are you?",
|
|
mock_response="I'm fine, thank you!",
|
|
)
|
|
else:
|
|
## Test 1: user facing function
|
|
response = await router.atext_completion(
|
|
model="gpt-3.5-turbo",
|
|
prompt="Hello, how are you?",
|
|
mock_response="I'm fine, thank you!",
|
|
)
|
|
|
|
## Test 2: underlying function
|
|
response_2 = await router._atext_completion(
|
|
model="gpt-3.5-turbo",
|
|
prompt="Hello, how are you?",
|
|
mock_response="I'm fine, thank you!",
|
|
)
|
|
assert response_2.choices[0].text == "I'm fine, thank you!"
|
|
|
|
assert response.choices[0].text == "I'm fine, thank you!"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_anthropic_router_completion_e2e(model_list):
|
|
from litellm.adapters.anthropic_adapter import anthropic_adapter
|
|
from litellm.types.llms.anthropic import AnthropicResponse
|
|
|
|
litellm.set_verbose = True
|
|
|
|
litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
|
|
|
|
router = Router(model_list=model_list)
|
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
|
|
|
## Test 1: user facing function
|
|
response = await router.aadapter_completion(
|
|
model="claude-3-5-sonnet-20240620",
|
|
messages=messages,
|
|
adapter_id="anthropic",
|
|
mock_response="This is a fake call",
|
|
)
|
|
|
|
## Test 2: underlying function
|
|
await router._aadapter_completion(
|
|
model="claude-3-5-sonnet-20240620",
|
|
messages=messages,
|
|
adapter_id="anthropic",
|
|
mock_response="This is a fake call",
|
|
)
|
|
|
|
print("Response: {}".format(response))
|
|
|
|
assert response is not None
|
|
|
|
AnthropicResponse.model_validate(response)
|
|
|
|
assert response.model == "gpt-3.5-turbo"
|