mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
fix(route_llm_request.py): move to using common router, even for clie… (#8966)
* fix(route_llm_request.py): move to using common router, even for client-side credentials ensures fallbacks / cooldown logic still works * test(test_route_llm_request.py): add unit test for route request * feat(router.py): generate unique model id when clientside credential passed in Prevents cooldowns for api key 1 from impacting api key 2 * test(test_router.py): update testing to ensure original litellm params not mutated * fix(router.py): upsert clientside call into llm router model list enables cooldown logic to work accurately * fix: fix linting error * test(test_router_utils.py): add direct test for new util on router
This commit is contained in:
parent
bd2231400f
commit
ae6f91a56d
9 changed files with 273 additions and 36 deletions
|
@ -67,6 +67,10 @@ from litellm.router_utils.batch_utils import (
|
|||
replace_model_in_jsonl,
|
||||
)
|
||||
from litellm.router_utils.client_initalization_utils import InitalizeOpenAISDKClient
|
||||
from litellm.router_utils.clientside_credential_handler import (
|
||||
get_dynamic_litellm_params,
|
||||
is_clientside_credential,
|
||||
)
|
||||
from litellm.router_utils.cooldown_cache import CooldownCache
|
||||
from litellm.router_utils.cooldown_handlers import (
|
||||
DEFAULT_COOLDOWN_TIME_SECONDS,
|
||||
|
@ -1067,20 +1071,61 @@ class Router:
|
|||
elif k == "metadata":
|
||||
kwargs[k].update(v)
|
||||
|
||||
def _handle_clientside_credential(
|
||||
self, deployment: dict, kwargs: dict
|
||||
) -> Deployment:
|
||||
"""
|
||||
Handle clientside credential
|
||||
"""
|
||||
model_info = deployment.get("model_info", {}).copy()
|
||||
litellm_params = deployment["litellm_params"].copy()
|
||||
dynamic_litellm_params = get_dynamic_litellm_params(
|
||||
litellm_params=litellm_params, request_kwargs=kwargs
|
||||
)
|
||||
metadata = kwargs.get("metadata", {})
|
||||
model_group = cast(str, metadata.get("model_group"))
|
||||
_model_id = self._generate_model_id(
|
||||
model_group=model_group, litellm_params=dynamic_litellm_params
|
||||
)
|
||||
original_model_id = model_info.get("id")
|
||||
model_info["id"] = _model_id
|
||||
model_info["original_model_id"] = original_model_id
|
||||
deployment_pydantic_obj = Deployment(
|
||||
model_name=model_group,
|
||||
litellm_params=LiteLLM_Params(**dynamic_litellm_params),
|
||||
model_info=model_info,
|
||||
)
|
||||
self.upsert_deployment(
|
||||
deployment=deployment_pydantic_obj
|
||||
) # add new deployment to router
|
||||
return deployment_pydantic_obj
|
||||
|
||||
def _update_kwargs_with_deployment(self, deployment: dict, kwargs: dict) -> None:
|
||||
"""
|
||||
2 jobs:
|
||||
- Adds selected deployment, model_info and api_base to kwargs["metadata"] (used for logging)
|
||||
- Adds default litellm params to kwargs, if set.
|
||||
"""
|
||||
model_info = deployment.get("model_info", {}).copy()
|
||||
deployment_model_name = deployment["litellm_params"]["model"]
|
||||
deployment_api_base = deployment["litellm_params"].get("api_base")
|
||||
if is_clientside_credential(request_kwargs=kwargs):
|
||||
deployment_pydantic_obj = self._handle_clientside_credential(
|
||||
deployment=deployment, kwargs=kwargs
|
||||
)
|
||||
model_info = deployment_pydantic_obj.model_info.model_dump()
|
||||
deployment_model_name = deployment_pydantic_obj.litellm_params.model
|
||||
deployment_api_base = deployment_pydantic_obj.litellm_params.api_base
|
||||
|
||||
kwargs.setdefault("metadata", {}).update(
|
||||
{
|
||||
"deployment": deployment["litellm_params"]["model"],
|
||||
"model_info": deployment.get("model_info", {}),
|
||||
"api_base": deployment.get("litellm_params", {}).get("api_base"),
|
||||
"deployment": deployment_model_name,
|
||||
"model_info": model_info,
|
||||
"api_base": deployment_api_base,
|
||||
}
|
||||
)
|
||||
kwargs["model_info"] = deployment.get("model_info", {})
|
||||
kwargs["model_info"] = model_info
|
||||
|
||||
kwargs["timeout"] = self._get_timeout(
|
||||
kwargs=kwargs, data=deployment["litellm_params"]
|
||||
)
|
||||
|
@ -3601,6 +3646,7 @@ class Router:
|
|||
- True if the deployment should be put in cooldown
|
||||
- False if the deployment should not be put in cooldown
|
||||
"""
|
||||
verbose_router_logger.debug("Router: Entering 'deployment_callback_on_failure'")
|
||||
try:
|
||||
exception = kwargs.get("exception", None)
|
||||
exception_status = getattr(exception, "status_code", "")
|
||||
|
@ -3642,6 +3688,9 @@ class Router:
|
|||
|
||||
return result
|
||||
else:
|
||||
verbose_router_logger.debug(
|
||||
"Router: Exiting 'deployment_callback_on_failure' without cooldown. No model_info found."
|
||||
)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue