mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
* fix(pattern_match_deployments.py): default to user input if unable to map based on wildcards * test: fix test * test: reset test name * test: update conftest to reload proxy server module between tests * ci(config.yml): move langfuse out of local_testing reduce ci/cd time * ci(config.yml): cleanup langfuse ci/cd tests * fix: update test to not use global proxy_server app module * ci: move caching to a separate test pipeline speed up ci pipeline * test: update conftest to check if proxy_server attr exists before reloading * build(conftest.py): don't block on inability to reload proxy_server * ci(config.yml): update caching unit test filter to work on 'cache' keyword as well * fix(encrypt_decrypt_utils.py): use function to get salt key * test: mark flaky test * test: handle anthropic overloaded errors * refactor: create separate ci/cd pipeline for proxy unit tests make ci/cd faster * ci(config.yml): add litellm_proxy_unit_testing to build_and_test jobs * ci(config.yml): generate prisma binaries for proxy unit tests * test: readd vertex_key.json * ci(config.yml): remove `-s` from proxy_unit_test cmd speed up test * ci: remove any 'debug' logging flag speed up ci pipeline * test: fix test * test(test_braintrust.py): rerun * test: add delay for braintrust test
280 lines
7.8 KiB
Python
280 lines
7.8 KiB
Python
import sys
|
|
import os
|
|
import traceback
|
|
from dotenv import load_dotenv
|
|
from fastapi import Request
|
|
from datetime import datetime
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
from litellm import Router, CustomLogger
|
|
|
|
# Get the current directory of the file being run
|
|
pwd = os.path.dirname(os.path.realpath(__file__))
|
|
print(pwd)
|
|
|
|
file_path = os.path.join(pwd, "gettysburg.wav")
|
|
|
|
audio_file = open(file_path, "rb")
|
|
from pathlib import Path
|
|
import litellm
|
|
import pytest
|
|
import asyncio
|
|
|
|
|
|
@pytest.fixture
|
|
def model_list():
|
|
return [
|
|
{
|
|
"model_name": "gpt-3.5-turbo",
|
|
"litellm_params": {
|
|
"model": "gpt-3.5-turbo",
|
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
},
|
|
},
|
|
{
|
|
"model_name": "gpt-4o",
|
|
"litellm_params": {
|
|
"model": "gpt-4o",
|
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
},
|
|
},
|
|
{
|
|
"model_name": "dall-e-3",
|
|
"litellm_params": {
|
|
"model": "dall-e-3",
|
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
},
|
|
},
|
|
{
|
|
"model_name": "cohere-rerank",
|
|
"litellm_params": {
|
|
"model": "cohere/rerank-english-v3.0",
|
|
"api_key": os.getenv("COHERE_API_KEY"),
|
|
},
|
|
},
|
|
{
|
|
"model_name": "claude-3-5-sonnet-20240620",
|
|
"litellm_params": {
|
|
"model": "gpt-3.5-turbo",
|
|
"mock_response": "hi this is macintosh.",
|
|
},
|
|
},
|
|
]
|
|
|
|
|
|
# This file includes the custom callbacks for LiteLLM Proxy
|
|
# Once defined, these can be passed in proxy_config.yaml
|
|
class MyCustomHandler(CustomLogger):
|
|
def __init__(self):
|
|
self.openai_client = None
|
|
|
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
|
try:
|
|
# init logging config
|
|
print("logging a transcript kwargs: ", kwargs)
|
|
print("openai client=", kwargs.get("client"))
|
|
self.openai_client = kwargs.get("client")
|
|
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
proxy_handler_instance = MyCustomHandler()
|
|
|
|
|
|
# Set litellm.callbacks = [proxy_handler_instance] on the proxy
|
|
# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.flaky(retries=6, delay=10)
|
|
async def test_transcription_on_router():
|
|
litellm.set_verbose = True
|
|
litellm.callbacks = [proxy_handler_instance]
|
|
print("\n Testing async transcription on router\n")
|
|
try:
|
|
model_list = [
|
|
{
|
|
"model_name": "whisper",
|
|
"litellm_params": {
|
|
"model": "whisper-1",
|
|
},
|
|
},
|
|
{
|
|
"model_name": "whisper",
|
|
"litellm_params": {
|
|
"model": "azure/azure-whisper",
|
|
"api_base": "https://my-endpoint-europe-berri-992.openai.azure.com/",
|
|
"api_key": os.getenv("AZURE_EUROPE_API_KEY"),
|
|
"api_version": "2024-02-15-preview",
|
|
},
|
|
},
|
|
]
|
|
|
|
router = Router(model_list=model_list)
|
|
|
|
router_level_clients = []
|
|
for deployment in router.model_list:
|
|
_deployment_openai_client = router._get_client(
|
|
deployment=deployment,
|
|
kwargs={"model": "whisper-1"},
|
|
client_type="async",
|
|
)
|
|
|
|
router_level_clients.append(str(_deployment_openai_client))
|
|
|
|
## test 1: user facing function
|
|
response = await router.atranscription(
|
|
model="whisper",
|
|
file=audio_file,
|
|
)
|
|
|
|
## test 2: underlying function
|
|
response = await router._atranscription(
|
|
model="whisper",
|
|
file=audio_file,
|
|
)
|
|
print(response)
|
|
|
|
# PROD Test
|
|
# Ensure we ONLY use OpenAI/Azure client initialized on the router level
|
|
await asyncio.sleep(5)
|
|
print("OpenAI Client used= ", proxy_handler_instance.openai_client)
|
|
print("all router level clients= ", router_level_clients)
|
|
assert proxy_handler_instance.openai_client in router_level_clients
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
@pytest.mark.parametrize("mode", ["iterator"]) # "file",
|
|
@pytest.mark.asyncio
|
|
async def test_audio_speech_router(mode):
|
|
|
|
from litellm import Router
|
|
|
|
client = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "tts",
|
|
"litellm_params": {
|
|
"model": "openai/tts-1",
|
|
},
|
|
},
|
|
]
|
|
)
|
|
|
|
response = await client.aspeech(
|
|
model="tts",
|
|
voice="alloy",
|
|
input="the quick brown fox jumped over the lazy dogs",
|
|
api_base=None,
|
|
api_key=None,
|
|
organization=None,
|
|
project=None,
|
|
max_retries=1,
|
|
timeout=600,
|
|
client=None,
|
|
optional_params={},
|
|
)
|
|
|
|
from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent
|
|
|
|
assert isinstance(response, HttpxBinaryResponseContent)
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_rerank_endpoint(model_list):
|
|
from litellm.types.utils import RerankResponse
|
|
|
|
router = Router(model_list=model_list)
|
|
|
|
## Test 1: user facing function
|
|
response = await router.arerank(
|
|
model="cohere-rerank",
|
|
query="hello",
|
|
documents=["hello", "world"],
|
|
top_n=3,
|
|
)
|
|
|
|
## Test 2: underlying function
|
|
response = await router._arerank(
|
|
model="cohere-rerank",
|
|
query="hello",
|
|
documents=["hello", "world"],
|
|
top_n=3,
|
|
)
|
|
|
|
print("async re rank response: ", response)
|
|
|
|
assert response.id is not None
|
|
assert response.results is not None
|
|
|
|
RerankResponse.model_validate(response)
|
|
|
|
|
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
|
@pytest.mark.asyncio
|
|
async def test_text_completion_endpoint(model_list, sync_mode):
|
|
router = Router(model_list=model_list)
|
|
|
|
if sync_mode:
|
|
response = router.text_completion(
|
|
model="gpt-3.5-turbo",
|
|
prompt="Hello, how are you?",
|
|
mock_response="I'm fine, thank you!",
|
|
)
|
|
else:
|
|
## Test 1: user facing function
|
|
response = await router.atext_completion(
|
|
model="gpt-3.5-turbo",
|
|
prompt="Hello, how are you?",
|
|
mock_response="I'm fine, thank you!",
|
|
)
|
|
|
|
## Test 2: underlying function
|
|
response_2 = await router._atext_completion(
|
|
model="gpt-3.5-turbo",
|
|
prompt="Hello, how are you?",
|
|
mock_response="I'm fine, thank you!",
|
|
)
|
|
assert response_2.choices[0].text == "I'm fine, thank you!"
|
|
|
|
assert response.choices[0].text == "I'm fine, thank you!"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_anthropic_router_completion_e2e(model_list):
|
|
from litellm.adapters.anthropic_adapter import anthropic_adapter
|
|
from litellm.types.llms.anthropic import AnthropicResponse
|
|
|
|
litellm.set_verbose = True
|
|
|
|
litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
|
|
|
|
router = Router(model_list=model_list)
|
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
|
|
|
## Test 1: user facing function
|
|
response = await router.aadapter_completion(
|
|
model="claude-3-5-sonnet-20240620",
|
|
messages=messages,
|
|
adapter_id="anthropic",
|
|
mock_response="This is a fake call",
|
|
)
|
|
|
|
## Test 2: underlying function
|
|
await router._aadapter_completion(
|
|
model="claude-3-5-sonnet-20240620",
|
|
messages=messages,
|
|
adapter_id="anthropic",
|
|
mock_response="This is a fake call",
|
|
)
|
|
|
|
print("Response: {}".format(response))
|
|
|
|
assert response is not None
|
|
|
|
AnthropicResponse.model_validate(response)
|
|
|
|
assert response.model == "gpt-3.5-turbo"
|