test(router_code_coverage.py): check if all router functions are dire… (#6186)

* test(router_code_coverage.py): check if all router functions are directly tested

prevent regressions

* docs(configs.md): document all environment variables (#6185)

* docs: make it easier to find anthropic/openai prompt caching doc

* aded codecov yml (#6207)

* fix codecov.yaml

* run ci/cd again

* (refactor) caching use LLMCachingHandler for async_get_cache and set_cache  (#6208)

* use folder for caching

* fix importing caching

* fix clickhouse pyright

* fix linting

* fix correctly pass kwargs and args

* fix test case for embedding

* fix linting

* fix embedding caching logic

* fix refactor handle utils.py

* fix test_embedding_caching_azure_individual_items_reordered

* (feat) prometheus have well defined latency buckets (#6211)

* fix prometheus have well defined latency buckets

* use a well define latency bucket

* use types file for prometheus logging

* add test for LATENCY_BUCKETS

* fix prom testing

* fix config.yml

* (refactor caching) use LLMCachingHandler for caching streaming responses  (#6210)

* use folder for caching

* fix importing caching

* fix clickhouse pyright

* fix linting

* fix correctly pass kwargs and args

* fix test case for embedding

* fix linting

* fix embedding caching logic

* fix refactor handle utils.py

* refactor async set stream cache

* fix linting

* bump (#6187)

* update code cov yaml

* fix config.yml

* add caching component to code cov

* fix config.yml ci/cd

* add coverage for proxy auth

* (refactor caching) use common `_retrieve_from_cache` helper  (#6212)

* use folder for caching

* fix importing caching

* fix clickhouse pyright

* fix linting

* fix correctly pass kwargs and args

* fix test case for embedding

* fix linting

* fix embedding caching logic

* fix refactor handle utils.py

* refactor async set stream cache

* fix linting

* refactor - use _retrieve_from_cache

* refactor use _convert_cached_result_to_model_response

* fix linting errors

* bump: version 1.49.2 → 1.49.3

* fix code cov components

* test(test_router_helpers.py): add router component unit tests

* test: add additional router tests

* test: add more router testing

* test: add more router testing + more mock functions

* ci(router_code_coverage.py): fix check

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Co-authored-by: yujonglee <yujonglee.dev@gmail.com>
This commit is contained in:
Krish Dholakia 2024-10-14 22:44:00 -07:00 committed by GitHub
parent 39486e2003
commit 1eb435e50a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 768 additions and 164 deletions

View file

@ -111,6 +111,7 @@ from litellm.types.router import (
RouterModelGroupAliasItem,
RouterRateLimitError,
RouterRateLimitErrorBasic,
RoutingStrategy,
updateDeployment,
updateLiteLLMParams,
)
@ -519,6 +520,9 @@ class Router:
self._initialize_alerting()
def validate_fallbacks(self, fallback_param: Optional[List]):
"""
Validate the fallbacks parameter.
"""
if fallback_param is None:
return
@ -530,8 +534,13 @@ class Router:
f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys."
)
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
if routing_strategy == "least-busy":
def routing_strategy_init(
self, routing_strategy: Union[RoutingStrategy, str], routing_strategy_args: dict
):
if (
routing_strategy == RoutingStrategy.LEAST_BUSY.value
or routing_strategy == RoutingStrategy.LEAST_BUSY
):
self.leastbusy_logger = LeastBusyLoggingHandler(
router_cache=self.cache, model_list=self.model_list
)
@ -542,7 +551,10 @@ class Router:
litellm.input_callback = [self.leastbusy_logger] # type: ignore
if isinstance(litellm.callbacks, list):
litellm.callbacks.append(self.leastbusy_logger) # type: ignore
elif routing_strategy == "usage-based-routing":
elif (
routing_strategy == RoutingStrategy.USAGE_BASED_ROUTING.value
or routing_strategy == RoutingStrategy.USAGE_BASED_ROUTING
):
self.lowesttpm_logger = LowestTPMLoggingHandler(
router_cache=self.cache,
model_list=self.model_list,
@ -550,7 +562,10 @@ class Router:
)
if isinstance(litellm.callbacks, list):
litellm.callbacks.append(self.lowesttpm_logger) # type: ignore
elif routing_strategy == "usage-based-routing-v2":
elif (
routing_strategy == RoutingStrategy.USAGE_BASED_ROUTING_V2.value
or routing_strategy == RoutingStrategy.USAGE_BASED_ROUTING_V2
):
self.lowesttpm_logger_v2 = LowestTPMLoggingHandler_v2(
router_cache=self.cache,
model_list=self.model_list,
@ -558,7 +573,10 @@ class Router:
)
if isinstance(litellm.callbacks, list):
litellm.callbacks.append(self.lowesttpm_logger_v2) # type: ignore
elif routing_strategy == "latency-based-routing":
elif (
routing_strategy == RoutingStrategy.LATENCY_BASED.value
or routing_strategy == RoutingStrategy.LATENCY_BASED
):
self.lowestlatency_logger = LowestLatencyLoggingHandler(
router_cache=self.cache,
model_list=self.model_list,
@ -566,7 +584,10 @@ class Router:
)
if isinstance(litellm.callbacks, list):
litellm.callbacks.append(self.lowestlatency_logger) # type: ignore
elif routing_strategy == "cost-based-routing":
elif (
routing_strategy == RoutingStrategy.COST_BASED.value
or routing_strategy == RoutingStrategy.COST_BASED
):
self.lowestcost_logger = LowestCostLoggingHandler(
router_cache=self.cache,
model_list=self.model_list,
@ -574,10 +595,14 @@ class Router:
)
if isinstance(litellm.callbacks, list):
litellm.callbacks.append(self.lowestcost_logger) # type: ignore
else:
pass
def print_deployment(self, deployment: dict):
"""
returns a copy of the deployment with the api key masked
Only returns 2 characters of the api key and masks the rest with * (10 *).
"""
try:
_deployment_copy = copy.deepcopy(deployment)
@ -1746,7 +1771,6 @@ class Router:
try:
kwargs["model"] = model
kwargs["prompt"] = prompt
kwargs["original_function"] = self.text_completion
kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
kwargs.get("request_timeout", self.timeout)
kwargs.setdefault("metadata", {}).update({"model_group": model})
@ -1770,13 +1794,7 @@ class Router:
# call via litellm.completion()
return litellm.text_completion(**{**data, "prompt": prompt, "caching": self.cache_responses, **kwargs}) # type: ignore
except Exception as e:
if self.num_retries > 0:
kwargs["model"] = model
kwargs["messages"] = messages
kwargs["original_function"] = self.text_completion
return self.function_with_retries(**kwargs)
else:
raise e
raise e
async def atext_completion(
self,
@ -3005,7 +3023,7 @@ class Router:
async def make_call(self, original_function: Any, *args, **kwargs):
"""
Handler for making a call to the .completion()/.embeddings() functions.
Handler for making a call to the .completion()/.embeddings()/etc. functions.
"""
model_group = kwargs.get("model")
response = await original_function(*args, **kwargs)