forked from phoenix/litellm-mirror
(fix) add linting check to ban creating AsyncHTTPHandler
during LLM calling (#6855)
* fix triton * fix TEXT_COMPLETION_CODESTRAL * fix REPLICATE * fix CLARIFAI * fix HUGGINGFACE * add test_no_async_http_handler_usage * fix PREDIBASE * fix anthropic use get_async_httpx_client * fix vertex fine tuning * fix dbricks get_async_httpx_client * fix get_async_httpx_client vertex * fix get_async_httpx_client * fix get_async_httpx_client * fix make_async_azure_httpx_request * fix check_for_async_http_handler * test: cleanup mistral model * add check for AsyncClient * fix check_for_async_http_handler * fix get_async_httpx_client * fix tests using in_memory_llm_clients_cache * fix langfuse import * fix import --------- Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com>
This commit is contained in:
parent
71ebf47cef
commit
920f4c9f82
26 changed files with 288 additions and 62 deletions
88
tests/code_coverage_tests/ensure_async_clients_test.py
Normal file
88
tests/code_coverage_tests/ensure_async_clients_test.py
Normal file
|
@ -0,0 +1,88 @@
|
|||
import ast
|
||||
import os
|
||||
|
||||
ALLOWED_FILES = [
|
||||
# local files
|
||||
"../../litellm/__init__.py",
|
||||
"../../litellm/llms/custom_httpx/http_handler.py",
|
||||
# when running on ci/cd
|
||||
"./litellm/__init__.py",
|
||||
"./litellm/llms/custom_httpx/http_handler.py",
|
||||
]
|
||||
|
||||
warning_msg = "this is a serious violation that can impact latency. Creating Async clients per request can add +500ms per request"
|
||||
|
||||
|
||||
def check_for_async_http_handler(file_path):
|
||||
"""
|
||||
Checks if AsyncHttpHandler is instantiated in the given file.
|
||||
Returns a list of line numbers where AsyncHttpHandler is used.
|
||||
"""
|
||||
print("..checking file=", file_path)
|
||||
if file_path in ALLOWED_FILES:
|
||||
return []
|
||||
with open(file_path, "r") as file:
|
||||
try:
|
||||
tree = ast.parse(file.read())
|
||||
except SyntaxError:
|
||||
print(f"Warning: Syntax error in file {file_path}")
|
||||
return []
|
||||
|
||||
violations = []
|
||||
target_names = [
|
||||
"AsyncHttpHandler",
|
||||
"AsyncHTTPHandler",
|
||||
"AsyncClient",
|
||||
"httpx.AsyncClient",
|
||||
] # Add variations here
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.Call):
|
||||
if isinstance(node.func, ast.Name) and node.func.id.lower() in [
|
||||
name.lower() for name in target_names
|
||||
]:
|
||||
raise ValueError(
|
||||
f"found violation in file {file_path} line: {node.lineno}. Please use `get_async_httpx_client` instead. {warning_msg}"
|
||||
)
|
||||
return violations
|
||||
|
||||
|
||||
def scan_directory_for_async_handler(base_dir):
|
||||
"""
|
||||
Scans all Python files in the directory tree for AsyncHttpHandler usage.
|
||||
Returns a dict of files and line numbers where violations were found.
|
||||
"""
|
||||
violations = {}
|
||||
|
||||
for root, _, files in os.walk(base_dir):
|
||||
for file in files:
|
||||
if file.endswith(".py"):
|
||||
file_path = os.path.join(root, file)
|
||||
file_violations = check_for_async_http_handler(file_path)
|
||||
if file_violations:
|
||||
violations[file_path] = file_violations
|
||||
|
||||
return violations
|
||||
|
||||
|
||||
def test_no_async_http_handler_usage():
|
||||
"""
|
||||
Test to ensure AsyncHttpHandler is not used anywhere in the codebase.
|
||||
"""
|
||||
base_dir = "./litellm" # Adjust this path as needed
|
||||
|
||||
# base_dir = "../../litellm" # LOCAL TESTING
|
||||
violations = scan_directory_for_async_handler(base_dir)
|
||||
|
||||
if violations:
|
||||
violation_messages = []
|
||||
for file_path, line_numbers in violations.items():
|
||||
violation_messages.append(
|
||||
f"Found AsyncHttpHandler in {file_path} at lines: {line_numbers}"
|
||||
)
|
||||
raise AssertionError(
|
||||
"AsyncHttpHandler usage detected:\n" + "\n".join(violation_messages)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_no_async_http_handler_usage()
|
|
@ -8,6 +8,7 @@ import traceback
|
|||
|
||||
from dotenv import load_dotenv
|
||||
from openai.types.image import Image
|
||||
from litellm.caching import InMemoryCache
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
load_dotenv()
|
||||
|
@ -107,7 +108,7 @@ class TestVertexImageGeneration(BaseImageGenTest):
|
|||
# comment this when running locally
|
||||
load_vertex_ai_credentials()
|
||||
|
||||
litellm.in_memory_llm_clients_cache = {}
|
||||
litellm.in_memory_llm_clients_cache = InMemoryCache()
|
||||
return {
|
||||
"model": "vertex_ai/imagegeneration@006",
|
||||
"vertex_ai_project": "adroit-crow-413218",
|
||||
|
@ -118,13 +119,13 @@ class TestVertexImageGeneration(BaseImageGenTest):
|
|||
|
||||
class TestBedrockSd3(BaseImageGenTest):
|
||||
def get_base_image_generation_call_args(self) -> dict:
|
||||
litellm.in_memory_llm_clients_cache = {}
|
||||
litellm.in_memory_llm_clients_cache = InMemoryCache()
|
||||
return {"model": "bedrock/stability.sd3-large-v1:0"}
|
||||
|
||||
|
||||
class TestBedrockSd1(BaseImageGenTest):
|
||||
def get_base_image_generation_call_args(self) -> dict:
|
||||
litellm.in_memory_llm_clients_cache = {}
|
||||
litellm.in_memory_llm_clients_cache = InMemoryCache()
|
||||
return {"model": "bedrock/stability.sd3-large-v1:0"}
|
||||
|
||||
|
||||
|
@ -181,7 +182,7 @@ def test_image_generation_azure_dall_e_3():
|
|||
@pytest.mark.asyncio
|
||||
async def test_aimage_generation_bedrock_with_optional_params():
|
||||
try:
|
||||
litellm.in_memory_llm_clients_cache = {}
|
||||
litellm.in_memory_llm_clients_cache = InMemoryCache()
|
||||
response = await litellm.aimage_generation(
|
||||
prompt="A cute baby sea otter",
|
||||
model="bedrock/stability.stable-diffusion-xl-v1",
|
||||
|
|
|
@ -12,6 +12,7 @@ sys.path.insert(0, os.path.abspath("../.."))
|
|||
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import InMemoryCache
|
||||
|
||||
litellm.num_retries = 3
|
||||
litellm.success_callback = ["langfuse"]
|
||||
|
@ -29,15 +30,20 @@ def langfuse_client():
|
|||
f"{os.environ['LANGFUSE_PUBLIC_KEY']}-{os.environ['LANGFUSE_SECRET_KEY']}"
|
||||
)
|
||||
# use a in memory langfuse client for testing, RAM util on ci/cd gets too high when we init many langfuse clients
|
||||
if _langfuse_cache_key in litellm.in_memory_llm_clients_cache:
|
||||
langfuse_client = litellm.in_memory_llm_clients_cache[_langfuse_cache_key]
|
||||
|
||||
_cached_client = litellm.in_memory_llm_clients_cache.get_cache(_langfuse_cache_key)
|
||||
if _cached_client:
|
||||
langfuse_client = _cached_client
|
||||
else:
|
||||
langfuse_client = langfuse.Langfuse(
|
||||
public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
|
||||
secret_key=os.environ["LANGFUSE_SECRET_KEY"],
|
||||
host=None,
|
||||
)
|
||||
litellm.in_memory_llm_clients_cache[_langfuse_cache_key] = langfuse_client
|
||||
litellm.in_memory_llm_clients_cache.set_cache(
|
||||
key=_langfuse_cache_key,
|
||||
value=langfuse_client,
|
||||
)
|
||||
|
||||
print("NEW LANGFUSE CLIENT")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue