Litellm perf improvements 3 (#6573)

* perf: move writing key to cache, to background task

* perf(litellm_pre_call_utils.py): add otel tracing for pre-call utils

adds 200ms on calls with pgdb connected

* fix(litellm_pre_call_utils.py'): rename call_type to actual call used

* perf(proxy_server.py): remove db logic from _get_config_from_file

was causing db calls to occur on every llm request, if team_id was set on key

* fix(auth_checks.py): add check for reducing db calls if user/team id does not exist in db

reduces latency/call by ~100ms

* fix(proxy_server.py): minor fix on existing_settings not incl alerting

* fix(exception_mapping_utils.py): map databricks exception string

* fix(auth_checks.py): fix auth check logic

* test: correctly mark flaky test

* fix(utils.py): handle auth token error for tokenizers.from_pretrained
This commit is contained in:
Krish Dholakia 2024-11-05 03:51:26 +05:30 committed by GitHub
parent 7525b6bbaa
commit 3a6ba0b955
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 137 additions and 86 deletions

View file

@ -1373,9 +1373,6 @@ class ProxyConfig:
) -> dict:
"""
Given a config file path, load the config from the file.
If `store_model_in_db` is True, then read the DB and update the config with the DB values.
Args:
config_file_path (str): path to the config file
Returns:
@ -1401,40 +1398,6 @@ class ProxyConfig:
"litellm_settings": {},
}
## DB
if prisma_client is not None and (
general_settings.get("store_model_in_db", False) is True
or store_model_in_db is True
):
_tasks = []
keys = [
"general_settings",
"router_settings",
"litellm_settings",
"environment_variables",
]
for k in keys:
response = prisma_client.get_generic_data(
key="param_name", value=k, table_name="config"
)
_tasks.append(response)
responses = await asyncio.gather(*_tasks)
for response in responses:
if response is not None:
param_name = getattr(response, "param_name", None)
param_value = getattr(response, "param_value", None)
if param_name is not None and param_value is not None:
# check if param_name is already in the config
if param_name in config:
if isinstance(config[param_name], dict):
config[param_name].update(param_value)
else:
config[param_name] = param_value
else:
# if it's not in the config - then add it
config[param_name] = param_value
return config
async def save_config(self, new_config: dict):
@ -1500,8 +1463,10 @@ class ProxyConfig:
- for a given team id
- return the relevant completion() call params
"""
# load existing config
config = await self.get_config()
## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
litellm_settings = config.get("litellm_settings", {})
all_teams_config = litellm_settings.get("default_team_settings", None)
@ -8824,7 +8789,7 @@ async def update_config(config_info: ConfigYAML): # noqa: PLR0915
if k == "alert_to_webhook_url":
# check if slack is already enabled. if not, enable it
if "alerting" not in _existing_settings:
_existing_settings["alerting"].append("slack")
_existing_settings = {"alerting": ["slack"]}
elif isinstance(_existing_settings["alerting"], list):
if "slack" not in _existing_settings["alerting"]:
_existing_settings["alerting"].append("slack")