(Perf / latency improvement) improve pass through endpoint latency to ~50ms (before PR was 400ms) (#6874)

* use correct location for types

* fix types location

* perf improvement for pass through endpoints

* update lint check

* fix import

* fix ensure async clients test

* fix azure.py health check

* fix ollama
This commit is contained in:
Ishaan Jaff 2024-11-22 18:47:26 -08:00 committed by GitHub
parent 772b2f9cd2
commit d81ae45827
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 64 additions and 19 deletions

View file

@ -5,9 +5,19 @@ ALLOWED_FILES = [
# local files
"../../litellm/__init__.py",
"../../litellm/llms/custom_httpx/http_handler.py",
"../../litellm/router_utils/client_initalization_utils.py",
"../../litellm/llms/custom_httpx/http_handler.py",
"../../litellm/llms/huggingface_restapi.py",
"../../litellm/llms/base.py",
"../../litellm/llms/custom_httpx/httpx_handler.py",
# when running on ci/cd
"./litellm/__init__.py",
"./litellm/llms/custom_httpx/http_handler.py",
"./litellm/router_utils/client_initalization_utils.py",
"./litellm/llms/custom_httpx/http_handler.py",
"./litellm/llms/huggingface_restapi.py",
"./litellm/llms/base.py",
"./litellm/llms/custom_httpx/httpx_handler.py",
]
warning_msg = "this is a serious violation that can impact latency. Creating Async clients per request can add +500ms per request"
@ -43,6 +53,19 @@ def check_for_async_http_handler(file_path):
raise ValueError(
f"found violation in file {file_path} line: {node.lineno}. Please use `get_async_httpx_client` instead. {warning_msg}"
)
# Check for attribute calls like httpx.AsyncClient()
elif isinstance(node.func, ast.Attribute):
full_name = ""
current = node.func
while isinstance(current, ast.Attribute):
full_name = "." + current.attr + full_name
current = current.value
if isinstance(current, ast.Name):
full_name = current.id + full_name
if full_name.lower() in [name.lower() for name in target_names]:
raise ValueError(
f"found violation in file {file_path} line: {node.lineno}. Please use `get_async_httpx_client` instead. {warning_msg}"
)
return violations