mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
fix: Minor LiteLLM Fixes + Improvements (29/08/2024) (#5436)
* fix(model_checks.py): support returning wildcard models on `/v1/models` Fixes https://github.com/BerriAI/litellm/issues/4903 * fix(bedrock_httpx.py): support calling bedrock via api_base Closes https://github.com/BerriAI/litellm/pull/4587 * fix(litellm_logging.py): only leave last 4 char of gemini key unmasked Fixes https://github.com/BerriAI/litellm/issues/5433 * feat(router.py): support setting 'weight' param for models on router Closes https://github.com/BerriAI/litellm/issues/5410 * test(test_bedrock_completion.py): add unit test for custom api base * fix(model_checks.py): handle no "/" in model
This commit is contained in:
parent
f70b7575d2
commit
dd7b008161
12 changed files with 219 additions and 25 deletions
|
@ -4700,6 +4700,31 @@ class Router:
|
|||
)
|
||||
elif self.routing_strategy == "simple-shuffle":
|
||||
# if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm
|
||||
|
||||
############## Check if 'weight' param set for a weighted pick #################
|
||||
weight = (
|
||||
healthy_deployments[0].get("litellm_params").get("weight", None)
|
||||
)
|
||||
if weight is not None:
|
||||
# use weight-random pick if rpms provided
|
||||
weights = [
|
||||
m["litellm_params"].get("weight", 0)
|
||||
for m in healthy_deployments
|
||||
]
|
||||
verbose_router_logger.debug(f"\nweight {weights}")
|
||||
total_weight = sum(weights)
|
||||
weights = [weight / total_weight for weight in weights]
|
||||
verbose_router_logger.debug(f"\n weights {weights}")
|
||||
# Perform weighted random pick
|
||||
selected_index = random.choices(
|
||||
range(len(weights)), weights=weights
|
||||
)[0]
|
||||
verbose_router_logger.debug(f"\n selected index, {selected_index}")
|
||||
deployment = healthy_deployments[selected_index]
|
||||
verbose_router_logger.info(
|
||||
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
|
||||
)
|
||||
return deployment or deployment[0]
|
||||
############## Check if we can do a RPM/TPM based weighted pick #################
|
||||
rpm = healthy_deployments[0].get("litellm_params").get("rpm", None)
|
||||
if rpm is not None:
|
||||
|
@ -4847,6 +4872,25 @@ class Router:
|
|||
)
|
||||
elif self.routing_strategy == "simple-shuffle":
|
||||
# if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm
|
||||
############## Check 'weight' param set for weighted pick #################
|
||||
weight = healthy_deployments[0].get("litellm_params").get("weight", None)
|
||||
if weight is not None:
|
||||
# use weight-random pick if rpms provided
|
||||
weights = [
|
||||
m["litellm_params"].get("weight", 0) for m in healthy_deployments
|
||||
]
|
||||
verbose_router_logger.debug(f"\nweight {weights}")
|
||||
total_weight = sum(weights)
|
||||
weights = [weight / total_weight for weight in weights]
|
||||
verbose_router_logger.debug(f"\n weights {weights}")
|
||||
# Perform weighted random pick
|
||||
selected_index = random.choices(range(len(weights)), weights=weights)[0]
|
||||
verbose_router_logger.debug(f"\n selected index, {selected_index}")
|
||||
deployment = healthy_deployments[selected_index]
|
||||
verbose_router_logger.info(
|
||||
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
|
||||
)
|
||||
return deployment or deployment[0]
|
||||
############## Check if we can do a RPM/TPM based weighted pick #################
|
||||
rpm = healthy_deployments[0].get("litellm_params").get("rpm", None)
|
||||
if rpm is not None:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue