mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
* feat(router.py): add check for max fallback depth Prevent infinite loop for fallbacks Closes https://github.com/BerriAI/litellm/issues/6498 * test: update test * (fix) Prometheus - Log Postgres DB latency, status on prometheus (#6484) * fix logging DB fails on prometheus * unit testing log to otel wrapper * unit testing for service logger + prometheus * use LATENCY buckets for service logging * fix service logging * docs clarify vertex vs gemini * (router_strategy/) ensure all async functions use async cache methods (#6489) * fix router strat * use async set / get cache in router_strategy * add coverage for router strategy * fix imports * fix batch_get_cache * use async methods for least busy * fix least busy use async methods * fix test_dual_cache_increment * test async_get_available_deployment when routing_strategy="least-busy" * (fix) proxy - fix when `STORE_MODEL_IN_DB` should be set (#6492) * set store_model_in_db at the top * correctly use store_model_in_db global * (fix) `PrometheusServicesLogger` `_get_metric` should return metric in Registry (#6486) * fix logging DB fails on prometheus * unit testing log to otel wrapper * unit testing for service logger + prometheus * use LATENCY buckets for service logging * fix service logging * fix _get_metric in prom services logger * add clear doc string * unit testing for prom service logger * bump: version 1.51.0 → 1.51.1 * Add `azure/gpt-4o-mini-2024-07-18` to model_prices_and_context_window.json (#6477) * Update utils.py (#6468) Fixed missing keys * (perf) Litellm redis router fix - ~100ms improvement (#6483) * docs(exception_mapping.md): add missing exception types Fixes https://github.com/Aider-AI/aider/issues/2120#issuecomment-2438971183 * fix(main.py): register custom model pricing with specific key Ensure custom model pricing is registered to the specific model+provider key combination * test: make testing more robust for custom pricing * fix(redis_cache.py): instrument otel logging for sync redis calls ensures complete coverage for all redis cache calls * refactor: pass parent_otel_span for redis caching calls in router allows for more observability into what calls are causing latency issues * test: update tests with new params * refactor: ensure e2e otel tracing for router * refactor(router.py): add more otel tracing acrosss router catch all latency issues for router requests * fix: fix linting error * fix(router.py): fix linting error * fix: fix test * test: fix tests * fix(dual_cache.py): pass ttl to redis cache * fix: fix param * perf(cooldown_cache.py): improve cooldown cache, to store cache results in memory for 5s, prevents redis call from being made on each request reduces 100ms latency per call with caching enabled on router * fix: fix test * fix(cooldown_cache.py): handle if a result is None * fix(cooldown_cache.py): add debug statements * refactor(dual_cache.py): move to using an in-memory check for batch get cache, to prevent redis from being hit for every call * fix(cooldown_cache.py): fix linting erropr * build: merge main --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev> Co-authored-by: vibhanshu-ob <115142120+vibhanshu-ob@users.noreply.github.com>
246 lines
9.5 KiB
Python
246 lines
9.5 KiB
Python
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
|
|
|
import litellm
|
|
from litellm._logging import verbose_router_logger
|
|
from litellm.integrations.custom_logger import CustomLogger
|
|
from litellm.main import verbose_logger
|
|
|
|
if TYPE_CHECKING:
|
|
from litellm.router import Router as _Router
|
|
|
|
LitellmRouter = _Router
|
|
else:
|
|
LitellmRouter = Any
|
|
|
|
|
|
async def run_async_fallback(
|
|
*args: Tuple[Any],
|
|
litellm_router: LitellmRouter,
|
|
fallback_model_group: List[str],
|
|
original_model_group: str,
|
|
original_exception: Exception,
|
|
max_fallbacks: int,
|
|
fallback_depth: int,
|
|
**kwargs,
|
|
) -> Any:
|
|
"""
|
|
Loops through all the fallback model groups and calls kwargs["original_function"] with the arguments and keyword arguments provided.
|
|
|
|
If the call is successful, it logs the success and returns the response.
|
|
If the call fails, it logs the failure and continues to the next fallback model group.
|
|
If all fallback model groups fail, it raises the most recent exception.
|
|
|
|
Args:
|
|
litellm_router: The litellm router instance.
|
|
*args: Positional arguments.
|
|
fallback_model_group: List[str] of fallback model groups. example: ["gpt-4", "gpt-3.5-turbo"]
|
|
original_model_group: The original model group. example: "gpt-3.5-turbo"
|
|
original_exception: The original exception.
|
|
**kwargs: Keyword arguments.
|
|
|
|
Returns:
|
|
The response from the successful fallback model group.
|
|
Raises:
|
|
The most recent exception if all fallback model groups fail.
|
|
"""
|
|
|
|
### BASE CASE ### MAX FALLBACK DEPTH REACHED
|
|
if fallback_depth >= max_fallbacks:
|
|
raise original_exception
|
|
|
|
error_from_fallbacks = original_exception
|
|
for mg in fallback_model_group:
|
|
if mg == original_model_group:
|
|
continue
|
|
try:
|
|
# LOGGING
|
|
kwargs = litellm_router.log_retry(kwargs=kwargs, e=original_exception)
|
|
verbose_router_logger.info(f"Falling back to model_group = {mg}")
|
|
kwargs["model"] = mg
|
|
kwargs.setdefault("metadata", {}).update(
|
|
{"model_group": mg}
|
|
) # update model_group used, if fallbacks are done
|
|
kwargs["fallback_depth"] = fallback_depth + 1
|
|
kwargs["max_fallbacks"] = max_fallbacks
|
|
response = await litellm_router.async_function_with_fallbacks(
|
|
*args, **kwargs
|
|
)
|
|
verbose_router_logger.info("Successful fallback b/w models.")
|
|
# callback for successfull_fallback_event():
|
|
await log_success_fallback_event(
|
|
original_model_group=original_model_group,
|
|
kwargs=kwargs,
|
|
original_exception=original_exception,
|
|
)
|
|
return response
|
|
except Exception as e:
|
|
error_from_fallbacks = e
|
|
await log_failure_fallback_event(
|
|
original_model_group=original_model_group,
|
|
kwargs=kwargs,
|
|
original_exception=original_exception,
|
|
)
|
|
raise error_from_fallbacks
|
|
|
|
|
|
def run_sync_fallback(
|
|
litellm_router: LitellmRouter,
|
|
*args: Tuple[Any],
|
|
fallback_model_group: List[str],
|
|
original_model_group: str,
|
|
original_exception: Exception,
|
|
**kwargs,
|
|
) -> Any:
|
|
"""
|
|
Synchronous version of run_async_fallback.
|
|
Loops through all the fallback model groups and calls kwargs["original_function"] with the arguments and keyword arguments provided.
|
|
|
|
If the call is successful, returns the response.
|
|
If the call fails, continues to the next fallback model group.
|
|
If all fallback model groups fail, it raises the most recent exception.
|
|
|
|
Args:
|
|
litellm_router: The litellm router instance.
|
|
*args: Positional arguments.
|
|
fallback_model_group: List[str] of fallback model groups. example: ["gpt-4", "gpt-3.5-turbo"]
|
|
original_model_group: The original model group. example: "gpt-3.5-turbo"
|
|
original_exception: The original exception.
|
|
**kwargs: Keyword arguments.
|
|
|
|
Returns:
|
|
The response from the successful fallback model group.
|
|
Raises:
|
|
The most recent exception if all fallback model groups fail.
|
|
"""
|
|
error_from_fallbacks = original_exception
|
|
for mg in fallback_model_group:
|
|
if mg == original_model_group:
|
|
continue
|
|
try:
|
|
# LOGGING
|
|
kwargs = litellm_router.log_retry(kwargs=kwargs, e=original_exception)
|
|
verbose_router_logger.info(f"Falling back to model_group = {mg}")
|
|
kwargs["model"] = mg
|
|
kwargs.setdefault("metadata", {}).update(
|
|
{"model_group": mg}
|
|
) # update model_group used, if fallbacks are done
|
|
response = litellm_router.function_with_fallbacks(*args, **kwargs)
|
|
verbose_router_logger.info("Successful fallback b/w models.")
|
|
return response
|
|
except Exception as e:
|
|
error_from_fallbacks = e
|
|
raise error_from_fallbacks
|
|
|
|
|
|
async def log_success_fallback_event(
|
|
original_model_group: str, kwargs: dict, original_exception: Exception
|
|
):
|
|
"""
|
|
Log a successful fallback event to all registered callbacks.
|
|
|
|
This function iterates through all callbacks, initializing _known_custom_logger_compatible_callbacks if needed,
|
|
and calls the log_success_fallback_event method on CustomLogger instances.
|
|
|
|
Args:
|
|
original_model_group (str): The original model group before fallback.
|
|
kwargs (dict): kwargs for the request
|
|
|
|
Note:
|
|
Errors during logging are caught and reported but do not interrupt the process.
|
|
"""
|
|
from litellm.litellm_core_utils.litellm_logging import (
|
|
_init_custom_logger_compatible_class,
|
|
)
|
|
|
|
for _callback in litellm.callbacks:
|
|
if isinstance(_callback, CustomLogger) or (
|
|
_callback in litellm._known_custom_logger_compatible_callbacks
|
|
):
|
|
try:
|
|
_callback_custom_logger: Optional[CustomLogger] = None
|
|
if _callback in litellm._known_custom_logger_compatible_callbacks:
|
|
_callback_custom_logger = _init_custom_logger_compatible_class(
|
|
logging_integration=_callback, # type: ignore
|
|
llm_router=None,
|
|
internal_usage_cache=None,
|
|
)
|
|
elif isinstance(_callback, CustomLogger):
|
|
_callback_custom_logger = _callback
|
|
else:
|
|
verbose_router_logger.exception(
|
|
f"{_callback} logger not found / initialized properly"
|
|
)
|
|
continue
|
|
|
|
if _callback_custom_logger is None:
|
|
verbose_router_logger.exception(
|
|
f"{_callback} logger not found / initialized properly, callback is None"
|
|
)
|
|
continue
|
|
|
|
await _callback_custom_logger.log_success_fallback_event(
|
|
original_model_group=original_model_group,
|
|
kwargs=kwargs,
|
|
original_exception=original_exception,
|
|
)
|
|
except Exception as e:
|
|
verbose_router_logger.error(
|
|
f"Error in log_success_fallback_event: {str(e)}"
|
|
)
|
|
|
|
|
|
async def log_failure_fallback_event(
|
|
original_model_group: str, kwargs: dict, original_exception: Exception
|
|
):
|
|
"""
|
|
Log a failed fallback event to all registered callbacks.
|
|
|
|
This function iterates through all callbacks, initializing _known_custom_logger_compatible_callbacks if needed,
|
|
and calls the log_failure_fallback_event method on CustomLogger instances.
|
|
|
|
Args:
|
|
original_model_group (str): The original model group before fallback.
|
|
kwargs (dict): kwargs for the request
|
|
|
|
Note:
|
|
Errors during logging are caught and reported but do not interrupt the process.
|
|
"""
|
|
from litellm.litellm_core_utils.litellm_logging import (
|
|
_init_custom_logger_compatible_class,
|
|
)
|
|
|
|
for _callback in litellm.callbacks:
|
|
if isinstance(_callback, CustomLogger) or (
|
|
_callback in litellm._known_custom_logger_compatible_callbacks
|
|
):
|
|
try:
|
|
_callback_custom_logger: Optional[CustomLogger] = None
|
|
if _callback in litellm._known_custom_logger_compatible_callbacks:
|
|
_callback_custom_logger = _init_custom_logger_compatible_class(
|
|
logging_integration=_callback, # type: ignore
|
|
llm_router=None,
|
|
internal_usage_cache=None,
|
|
)
|
|
elif isinstance(_callback, CustomLogger):
|
|
_callback_custom_logger = _callback
|
|
else:
|
|
verbose_router_logger.exception(
|
|
f"{_callback} logger not found / initialized properly"
|
|
)
|
|
continue
|
|
|
|
if _callback_custom_logger is None:
|
|
verbose_router_logger.exception(
|
|
f"{_callback} logger not found / initialized properly"
|
|
)
|
|
continue
|
|
|
|
await _callback_custom_logger.log_failure_fallback_event(
|
|
original_model_group=original_model_group,
|
|
kwargs=kwargs,
|
|
original_exception=original_exception,
|
|
)
|
|
except Exception as e:
|
|
verbose_router_logger.error(
|
|
f"Error in log_failure_fallback_event: {str(e)}"
|
|
)
|