mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
test(router_code_coverage.py): check if all router functions are dire… (#6186)
* test(router_code_coverage.py): check if all router functions are directly tested prevent regressions * docs(configs.md): document all environment variables (#6185) * docs: make it easier to find anthropic/openai prompt caching doc * aded codecov yml (#6207) * fix codecov.yaml * run ci/cd again * (refactor) caching use LLMCachingHandler for async_get_cache and set_cache (#6208) * use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * fix test_embedding_caching_azure_individual_items_reordered * (feat) prometheus have well defined latency buckets (#6211) * fix prometheus have well defined latency buckets * use a well define latency bucket * use types file for prometheus logging * add test for LATENCY_BUCKETS * fix prom testing * fix config.yml * (refactor caching) use LLMCachingHandler for caching streaming responses (#6210) * use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * refactor async set stream cache * fix linting * bump (#6187) * update code cov yaml * fix config.yml * add caching component to code cov * fix config.yml ci/cd * add coverage for proxy auth * (refactor caching) use common `_retrieve_from_cache` helper (#6212) * use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * refactor async set stream cache * fix linting * refactor - use _retrieve_from_cache * refactor use _convert_cached_result_to_model_response * fix linting errors * bump: version 1.49.2 → 1.49.3 * fix code cov components * test(test_router_helpers.py): add router component unit tests * test: add additional router tests * test: add more router testing * test: add more router testing + more mock functions * ci(router_code_coverage.py): fix check --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: yujonglee <yujonglee.dev@gmail.com>
This commit is contained in:
parent
bcd1a52834
commit
8705f1af92
15 changed files with 768 additions and 164 deletions
279
tests/router_unit_tests/test_router_endpoints.py
Normal file
279
tests/router_unit_tests/test_router_endpoints.py
Normal file
|
@ -0,0 +1,279 @@
|
|||
import sys
|
||||
import os
|
||||
import traceback
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import Request
|
||||
from datetime import datetime
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
from litellm import Router, CustomLogger
|
||||
|
||||
# Get the current directory of the file being run
|
||||
pwd = os.path.dirname(os.path.realpath(__file__))
|
||||
print(pwd)
|
||||
|
||||
file_path = os.path.join(pwd, "gettysburg.wav")
|
||||
|
||||
audio_file = open(file_path, "rb")
|
||||
from pathlib import Path
|
||||
import litellm
|
||||
import pytest
|
||||
import asyncio
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def model_list():
|
||||
return [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-4o",
|
||||
"litellm_params": {
|
||||
"model": "gpt-4o",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "dall-e-3",
|
||||
"litellm_params": {
|
||||
"model": "dall-e-3",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "cohere-rerank",
|
||||
"litellm_params": {
|
||||
"model": "cohere/rerank-english-v3.0",
|
||||
"api_key": os.getenv("COHERE_API_KEY"),
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "claude-3-5-sonnet-20240620",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"mock_response": "hi this is macintosh.",
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# This file includes the custom callbacks for LiteLLM Proxy
|
||||
# Once defined, these can be passed in proxy_config.yaml
|
||||
class MyCustomHandler(CustomLogger):
|
||||
def __init__(self):
|
||||
self.openai_client = None
|
||||
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
try:
|
||||
# init logging config
|
||||
print("logging a transcript kwargs: ", kwargs)
|
||||
print("openai client=", kwargs.get("client"))
|
||||
self.openai_client = kwargs.get("client")
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
proxy_handler_instance = MyCustomHandler()
|
||||
|
||||
|
||||
# Set litellm.callbacks = [proxy_handler_instance] on the proxy
|
||||
# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcription_on_router():
|
||||
litellm.set_verbose = True
|
||||
litellm.callbacks = [proxy_handler_instance]
|
||||
print("\n Testing async transcription on router\n")
|
||||
try:
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "whisper",
|
||||
"litellm_params": {
|
||||
"model": "whisper-1",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "whisper",
|
||||
"litellm_params": {
|
||||
"model": "azure/azure-whisper",
|
||||
"api_base": "https://my-endpoint-europe-berri-992.openai.azure.com/",
|
||||
"api_key": os.getenv("AZURE_EUROPE_API_KEY"),
|
||||
"api_version": "2024-02-15-preview",
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
router = Router(model_list=model_list)
|
||||
|
||||
router_level_clients = []
|
||||
for deployment in router.model_list:
|
||||
_deployment_openai_client = router._get_client(
|
||||
deployment=deployment,
|
||||
kwargs={"model": "whisper-1"},
|
||||
client_type="async",
|
||||
)
|
||||
|
||||
router_level_clients.append(str(_deployment_openai_client))
|
||||
|
||||
## test 1: user facing function
|
||||
response = await router.atranscription(
|
||||
model="whisper",
|
||||
file=audio_file,
|
||||
)
|
||||
|
||||
## test 2: underlying function
|
||||
response = await router._atranscription(
|
||||
model="whisper",
|
||||
file=audio_file,
|
||||
)
|
||||
print(response)
|
||||
|
||||
# PROD Test
|
||||
# Ensure we ONLY use OpenAI/Azure client initialized on the router level
|
||||
await asyncio.sleep(5)
|
||||
print("OpenAI Client used= ", proxy_handler_instance.openai_client)
|
||||
print("all router level clients= ", router_level_clients)
|
||||
assert proxy_handler_instance.openai_client in router_level_clients
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mode", ["iterator"]) # "file",
|
||||
@pytest.mark.asyncio
|
||||
async def test_audio_speech_router(mode):
|
||||
|
||||
from litellm import Router
|
||||
|
||||
client = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "tts",
|
||||
"litellm_params": {
|
||||
"model": "openai/tts-1",
|
||||
},
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
response = await client.aspeech(
|
||||
model="tts",
|
||||
voice="alloy",
|
||||
input="the quick brown fox jumped over the lazy dogs",
|
||||
api_base=None,
|
||||
api_key=None,
|
||||
organization=None,
|
||||
project=None,
|
||||
max_retries=1,
|
||||
timeout=600,
|
||||
client=None,
|
||||
optional_params={},
|
||||
)
|
||||
|
||||
from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent
|
||||
|
||||
assert isinstance(response, HttpxBinaryResponseContent)
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_rerank_endpoint(model_list):
|
||||
from litellm.types.utils import RerankResponse
|
||||
|
||||
router = Router(model_list=model_list)
|
||||
|
||||
## Test 1: user facing function
|
||||
response = await router.arerank(
|
||||
model="cohere-rerank",
|
||||
query="hello",
|
||||
documents=["hello", "world"],
|
||||
top_n=3,
|
||||
)
|
||||
|
||||
## Test 2: underlying function
|
||||
response = await router._arerank(
|
||||
model="cohere-rerank",
|
||||
query="hello",
|
||||
documents=["hello", "world"],
|
||||
top_n=3,
|
||||
)
|
||||
|
||||
print("async re rank response: ", response)
|
||||
|
||||
assert response.id is not None
|
||||
assert response.results is not None
|
||||
|
||||
RerankResponse.model_validate(response)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_text_completion_endpoint(model_list, sync_mode):
|
||||
router = Router(model_list=model_list)
|
||||
|
||||
if sync_mode:
|
||||
response = router.text_completion(
|
||||
model="gpt-3.5-turbo",
|
||||
prompt="Hello, how are you?",
|
||||
mock_response="I'm fine, thank you!",
|
||||
)
|
||||
else:
|
||||
## Test 1: user facing function
|
||||
response = await router.atext_completion(
|
||||
model="gpt-3.5-turbo",
|
||||
prompt="Hello, how are you?",
|
||||
mock_response="I'm fine, thank you!",
|
||||
)
|
||||
|
||||
## Test 2: underlying function
|
||||
response_2 = await router._atext_completion(
|
||||
model="gpt-3.5-turbo",
|
||||
prompt="Hello, how are you?",
|
||||
mock_response="I'm fine, thank you!",
|
||||
)
|
||||
assert response_2.choices[0].text == "I'm fine, thank you!"
|
||||
|
||||
assert response.choices[0].text == "I'm fine, thank you!"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anthropic_router_completion_e2e(model_list):
|
||||
from litellm.adapters.anthropic_adapter import anthropic_adapter
|
||||
from litellm.types.llms.anthropic import AnthropicResponse
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
|
||||
|
||||
router = Router(model_list=model_list)
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
|
||||
## Test 1: user facing function
|
||||
response = await router.aadapter_completion(
|
||||
model="claude-3-5-sonnet-20240620",
|
||||
messages=messages,
|
||||
adapter_id="anthropic",
|
||||
mock_response="This is a fake call",
|
||||
)
|
||||
|
||||
## Test 2: underlying function
|
||||
await router._aadapter_completion(
|
||||
model="claude-3-5-sonnet-20240620",
|
||||
messages=messages,
|
||||
adapter_id="anthropic",
|
||||
mock_response="This is a fake call",
|
||||
)
|
||||
|
||||
print("Response: {}".format(response))
|
||||
|
||||
assert response is not None
|
||||
|
||||
AnthropicResponse.model_validate(response)
|
||||
|
||||
assert response.model == "gpt-3.5-turbo"
|
Loading…
Add table
Add a link
Reference in a new issue