mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
(Perf / latency improvement) improve pass through endpoint latency to ~50ms (before PR was 400ms) (#6874)
* use correct location for types * fix types location * perf improvement for pass through endpoints * update lint check * fix import * fix ensure async clients test * fix azure.py health check * fix ollama
This commit is contained in:
parent
772b2f9cd2
commit
d81ae45827
9 changed files with 64 additions and 19 deletions
|
@ -5,9 +5,19 @@ ALLOWED_FILES = [
|
|||
# local files
|
||||
"../../litellm/__init__.py",
|
||||
"../../litellm/llms/custom_httpx/http_handler.py",
|
||||
"../../litellm/router_utils/client_initalization_utils.py",
|
||||
"../../litellm/llms/custom_httpx/http_handler.py",
|
||||
"../../litellm/llms/huggingface_restapi.py",
|
||||
"../../litellm/llms/base.py",
|
||||
"../../litellm/llms/custom_httpx/httpx_handler.py",
|
||||
# when running on ci/cd
|
||||
"./litellm/__init__.py",
|
||||
"./litellm/llms/custom_httpx/http_handler.py",
|
||||
"./litellm/router_utils/client_initalization_utils.py",
|
||||
"./litellm/llms/custom_httpx/http_handler.py",
|
||||
"./litellm/llms/huggingface_restapi.py",
|
||||
"./litellm/llms/base.py",
|
||||
"./litellm/llms/custom_httpx/httpx_handler.py",
|
||||
]
|
||||
|
||||
warning_msg = "this is a serious violation that can impact latency. Creating Async clients per request can add +500ms per request"
|
||||
|
@ -43,6 +53,19 @@ def check_for_async_http_handler(file_path):
|
|||
raise ValueError(
|
||||
f"found violation in file {file_path} line: {node.lineno}. Please use `get_async_httpx_client` instead. {warning_msg}"
|
||||
)
|
||||
# Check for attribute calls like httpx.AsyncClient()
|
||||
elif isinstance(node.func, ast.Attribute):
|
||||
full_name = ""
|
||||
current = node.func
|
||||
while isinstance(current, ast.Attribute):
|
||||
full_name = "." + current.attr + full_name
|
||||
current = current.value
|
||||
if isinstance(current, ast.Name):
|
||||
full_name = current.id + full_name
|
||||
if full_name.lower() in [name.lower() for name in target_names]:
|
||||
raise ValueError(
|
||||
f"found violation in file {file_path} line: {node.lineno}. Please use `get_async_httpx_client` instead. {warning_msg}"
|
||||
)
|
||||
return violations
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue