mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
(feat) print debug info per deployment
This commit is contained in:
parent
1f15c79252
commit
6f0faca85b
3 changed files with 51 additions and 59 deletions
|
@ -240,6 +240,21 @@ class Router:
|
|||
{"caching_groups": caching_groups}
|
||||
)
|
||||
|
||||
self.deployment_stats: dict = {} # used for debugging load balancing
|
||||
"""
|
||||
deployment_stats = {
|
||||
"122999-2828282-277:
|
||||
{
|
||||
"model": "gpt-3",
|
||||
"api_base": "http://localhost:8000",
|
||||
"num_requests": 20,
|
||||
"avg_latency": 0.001,
|
||||
"num_failures": 0,
|
||||
"num_successes": 20
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
### ROUTING SETUP ###
|
||||
if routing_strategy == "least-busy":
|
||||
self.leastbusy_logger = LeastBusyLoggingHandler(
|
||||
|
@ -390,6 +405,10 @@ class Router:
|
|||
messages=messages,
|
||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||
)
|
||||
if self.set_verbose == True and self.debug_level == "DEBUG":
|
||||
# debug how often this deployment picked
|
||||
self._print_deployment_metrics(deployment=deployment)
|
||||
|
||||
kwargs.setdefault("metadata", {}).update(
|
||||
{
|
||||
"deployment": deployment["litellm_params"]["model"],
|
||||
|
@ -2124,6 +2143,32 @@ class Router:
|
|||
)
|
||||
return deployment
|
||||
|
||||
def _print_deployment_metrics(self, deployment):
|
||||
litellm_params = deployment["litellm_params"]
|
||||
api_base = litellm_params.get("api_base", "")
|
||||
model = litellm_params.get("model", "")
|
||||
|
||||
model_id = deployment.get("model_info", {}).get("id", None)
|
||||
|
||||
# update self.deployment_stats
|
||||
if model_id is not None:
|
||||
if model_id in self.deployment_stats:
|
||||
# only update num_requests
|
||||
self.deployment_stats[model_id]["num_requests"] += 1
|
||||
else:
|
||||
self.deployment_stats[model_id] = {
|
||||
"api_base": api_base,
|
||||
"model": model,
|
||||
"num_requests": 1,
|
||||
}
|
||||
from pprint import pformat
|
||||
|
||||
# Assuming self.deployment_stats is your dictionary
|
||||
formatted_stats = pformat(self.deployment_stats)
|
||||
|
||||
# Assuming verbose_router_logger is your logger
|
||||
verbose_router_logger.info("self.deployment_stats: \n%s", formatted_stats)
|
||||
|
||||
def flush_cache(self):
|
||||
litellm.cache = None
|
||||
self.cache.flush_cache()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue