fix: Minor LiteLLM Fixes + Improvements (29/08/2024) (#5436)

* fix(model_checks.py): support returning wildcard models on `/v1/models` Fixes https://github.com/BerriAI/litellm/issues/4903 * fix(bedrock_httpx.py): support calling bedrock via api_base Closes https://github.com/BerriAI/litellm/pull/4587 * fix(litellm_logging.py): only leave last 4 char of gemini key unmasked Fixes https://github.com/BerriAI/litellm/issues/5433 * feat(router.py): support setting 'weight' param for models on router Closes https://github.com/BerriAI/litellm/issues/5410 * test(test_bedrock_completion.py): add unit test for custom api base * fix(model_checks.py): handle no "/" in model
2024-08-29 22:40:25 -07:00 · 2024-08-29 22:40:25 -07:00 · dd7b008161
commit dd7b008161
parent f70b7575d2
12 changed files with 219 additions and 25 deletions
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -279,9 +279,7 @@ class Logging:
                # Find the position of "key=" in the string
                key_index = api_base.find("key=") + 4
                # Mask the last 5 characters after "key="
-                masked_api_base = (
+                masked_api_base = api_base[:key_index] + "*" * 5 + api_base[-4:]
                    api_base[:key_index] + "*" * 5 + api_base[key_index + 5 :]
                )
            else:
                masked_api_base = api_base
            self.model_call_details["litellm_params"]["api_base"] = masked_api_base
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@ -48,13 +48,7 @@ from litellm.types.llms.openai import (
 from litellm.types.utils import Choices
 from litellm.types.utils import GenericStreamingChunk as GChunk
 from litellm.types.utils import Message
-from litellm.utils import (
+from litellm.utils import CustomStreamWrapper, ModelResponse, Usage, get_secret
    CustomStreamWrapper,
    ModelResponse,
    Usage,
    get_secret,
    print_verbose,
 )
 from .base import BaseLLM
 from .base_aws_llm import BaseAWSLLM
@ -654,6 +648,7 @@ class BedrockLLM(BaseAWSLLM):
        self,
        model: str,
        messages: list,
        api_base: Optional[str],
        custom_prompt_dict: dict,
        model_response: ModelResponse,
        print_verbose: Callable,
@ -734,7 +729,9 @@ class BedrockLLM(BaseAWSLLM):
        ### SET RUNTIME ENDPOINT ###
        endpoint_url = ""
        env_aws_bedrock_runtime_endpoint = get_secret("AWS_BEDROCK_RUNTIME_ENDPOINT")
-        if aws_bedrock_runtime_endpoint is not None and isinstance(
+        if api_base is not None:
            endpoint_url = api_base
        elif aws_bedrock_runtime_endpoint is not None and isinstance(
            aws_bedrock_runtime_endpoint, str
        ):
            endpoint_url = aws_bedrock_runtime_endpoint
@ -1459,7 +1456,7 @@ class BedrockConverseLLM(BaseAWSLLM):
            client = client  # type: ignore
        try:
-            response = await client.post(api_base, headers=headers, data=data)  # type: ignore
+            response = await client.post(url=api_base, headers=headers, data=data)  # type: ignore
            response.raise_for_status()
        except httpx.HTTPStatusError as err:
            error_code = err.response.status_code
@ -1485,6 +1482,7 @@ class BedrockConverseLLM(BaseAWSLLM):
        self,
        model: str,
        messages: list,
        api_base: Optional[str],
        custom_prompt_dict: dict,
        model_response: ModelResponse,
        print_verbose: Callable,
@ -1565,7 +1563,9 @@ class BedrockConverseLLM(BaseAWSLLM):
        ### SET RUNTIME ENDPOINT ###
        endpoint_url = ""
        env_aws_bedrock_runtime_endpoint = get_secret("AWS_BEDROCK_RUNTIME_ENDPOINT")
-        if aws_bedrock_runtime_endpoint is not None and isinstance(
+        if api_base is not None:
            endpoint_url = api_base
        elif aws_bedrock_runtime_endpoint is not None and isinstance(
            aws_bedrock_runtime_endpoint, str
        ):
            endpoint_url = aws_bedrock_runtime_endpoint
--- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
@ -1284,7 +1284,7 @@ class VertexLLM(BaseLLM):
    ) -> Union[ModelResponse, CustomStreamWrapper]:
        request_body = await async_transform_request_body(**data)  # type: ignore
-        if client is None:
+        if client is None or not isinstance(client, AsyncHTTPHandler):
            _params = {}
            if timeout is not None:
                if isinstance(timeout, float) or isinstance(timeout, int):
@ -1293,6 +1293,16 @@ class VertexLLM(BaseLLM):
            client = AsyncHTTPHandler(**_params)  # type: ignore
        else:
            client = client  # type: ignore
        ## LOGGING
        logging_obj.pre_call(
            input=messages,
            api_key="",
            additional_args={
                "complete_input_dict": request_body,
                "api_base": api_base,
                "headers": headers,
            },
        )
        try:
            response = await client.post(api_base, headers=headers, json=request_body)  # type: ignore
--- a/litellm/main.py
+++ b/litellm/main.py
@ -2361,6 +2361,7 @@ def completion(
                    timeout=timeout,
                    acompletion=acompletion,
                    client=client,
                    api_base=api_base,
                )
            else:
                response = bedrock_chat_completion.completion(
@ -2378,6 +2379,7 @@ def completion(
                    timeout=timeout,
                    acompletion=acompletion,
                    client=client,
                    api_base=api_base,
                )
            if optional_params.get("stream", False):
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,9 +1,4 @@
 model_list:
-  - model_name: my-fake-openai-endpoint
+  - model_name: "gemini/*"
    litellm_params:
-      model: gpt-3.5-turbo
+      model: "gemini/*"
      api_key: "my-fake-key"
      mock_response: "hello-world"
 litellm_settings:
  ssl_verify: false
--- a/litellm/proxy/auth/model_checks.py
+++ b/litellm/proxy/auth/model_checks.py
@ -1,9 +1,41 @@
 # What is this?
 ## Common checks for /v1/models and `/model/info`
 from typing import List, Optional
-from litellm.proxy._types import UserAPIKeyAuth, SpecialModelNames
+
-from litellm.utils import get_valid_models
+import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy._types import SpecialModelNames, UserAPIKeyAuth
 from litellm.utils import get_valid_models
 def _check_wildcard_routing(model: str) -> bool:
    """
    Returns True if a model is a provider wildcard.
    """
    if model == "*":
        return True
    if "/" in model:
        llm_provider, potential_wildcard = model.split("/", 1)
        if (
            llm_provider in litellm.provider_list and potential_wildcard == "*"
        ):  # e.g. anthropic/*
            return True
    return False
 def get_provider_models(provider: str) -> Optional[List[str]]:
    """
    Returns the list of known models by provider
    """
    if provider == "*":
        return get_valid_models()
    if provider in litellm.models_by_provider:
        return litellm.models_by_provider[provider]
    return None
 def get_key_models(
@ -58,6 +90,8 @@ def get_complete_model_list(
    """
    - If key list is empty -> defer to team list
    - If team list is empty -> defer to proxy model list
    If list contains wildcard -> return known provider models
    """
    unique_models = set()
@ -76,4 +110,18 @@ def get_complete_model_list(
            valid_models = get_valid_models()
            unique_models.update(valid_models)
-    return list(unique_models)
+    models_to_remove = set()
    all_wildcard_models = []
    for model in unique_models:
        if _check_wildcard_routing(model=model):
            provider = model.split("/")[0]
            # get all known provider models
            wildcard_models = get_provider_models(provider=provider)
            if wildcard_models is not None:
                models_to_remove.add(model)
                all_wildcard_models.extend(wildcard_models)
    for model in models_to_remove:
        unique_models.remove(model)
    return list(unique_models) + all_wildcard_models
--- a/litellm/router.py
+++ b/litellm/router.py
@ -4700,6 +4700,31 @@ class Router:
                )
            elif self.routing_strategy == "simple-shuffle":
                # if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm
                ############## Check if 'weight' param set for a weighted pick #################
                weight = (
                    healthy_deployments[0].get("litellm_params").get("weight", None)
                )
                if weight is not None:
                    # use weight-random pick if rpms provided
                    weights = [
                        m["litellm_params"].get("weight", 0)
                        for m in healthy_deployments
                    ]
                    verbose_router_logger.debug(f"\nweight {weights}")
                    total_weight = sum(weights)
                    weights = [weight / total_weight for weight in weights]
                    verbose_router_logger.debug(f"\n weights {weights}")
                    # Perform weighted random pick
                    selected_index = random.choices(
                        range(len(weights)), weights=weights
                    )[0]
                    verbose_router_logger.debug(f"\n selected index, {selected_index}")
                    deployment = healthy_deployments[selected_index]
                    verbose_router_logger.info(
                        f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
                    )
                    return deployment or deployment[0]
                ############## Check if we can do a RPM/TPM based weighted pick #################
                rpm = healthy_deployments[0].get("litellm_params").get("rpm", None)
                if rpm is not None:
@ -4847,6 +4872,25 @@ class Router:
            )
        elif self.routing_strategy == "simple-shuffle":
            # if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm
            ############## Check 'weight' param set for weighted pick #################
            weight = healthy_deployments[0].get("litellm_params").get("weight", None)
            if weight is not None:
                # use weight-random pick if rpms provided
                weights = [
                    m["litellm_params"].get("weight", 0) for m in healthy_deployments
                ]
                verbose_router_logger.debug(f"\nweight {weights}")
                total_weight = sum(weights)
                weights = [weight / total_weight for weight in weights]
                verbose_router_logger.debug(f"\n weights {weights}")
                # Perform weighted random pick
                selected_index = random.choices(range(len(weights)), weights=weights)[0]
                verbose_router_logger.debug(f"\n selected index, {selected_index}")
                deployment = healthy_deployments[selected_index]
                verbose_router_logger.info(
                    f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
                )
                return deployment or deployment[0]
            ############## Check if we can do a RPM/TPM based weighted pick #################
            rpm = healthy_deployments[0].get("litellm_params").get("rpm", None)
            if rpm is not None:
--- a/litellm/tests/test_bedrock_completion.py
+++ b/litellm/tests/test_bedrock_completion.py
@ -960,11 +960,16 @@ async def test_bedrock_extra_headers():
                messages=[{"role": "user", "content": "What's AWS?"}],
                client=client,
                extra_headers={"test": "hello world", "Authorization": "my-test-key"},
                api_base="https://gateway.ai.cloudflare.com/v1/fa4cdcab1f32b95ca3b53fd36043d691/test/aws-bedrock/bedrock-runtime/us-east-1",
            )
        except Exception as e:
            pass
-        print(f"mock_client_post.call_args: {mock_client_post.call_args}")
+        print(f"mock_client_post.call_args.kwargs: {mock_client_post.call_args.kwargs}")
        assert (
            mock_client_post.call_args.kwargs["url"]
            == "https://gateway.ai.cloudflare.com/v1/fa4cdcab1f32b95ca3b53fd36043d691/test/aws-bedrock/bedrock-runtime/us-east-1/model/anthropic.claude-3-sonnet-20240229-v1:0/converse"
        )
        assert "test" in mock_client_post.call_args.kwargs["headers"]
        assert mock_client_post.call_args.kwargs["headers"]["test"] == "hello world"
        assert (
--- a/litellm/tests/test_custom_callback_input.py
+++ b/litellm/tests/test_custom_callback_input.py
@ -1347,3 +1347,33 @@ def test_logging_async_cache_hit_sync_call():
        assert standard_logging_object["cache_hit"] is True
        assert standard_logging_object["response_cost"] == 0
        assert standard_logging_object["saved_cache_cost"] > 0
 def test_logging_key_masking_gemini():
    customHandler = CompletionCustomHandler()
    litellm.callbacks = [customHandler]
    litellm.success_callback = []
    with patch.object(
        customHandler, "log_pre_api_call", new=MagicMock()
    ) as mock_client:
        try:
            resp = litellm.completion(
                model="gemini/gemini-1.5-pro",
                messages=[{"role": "user", "content": "Hey, how's it going?"}],
                api_key="LEAVE_ONLY_LAST_4_CHAR_UNMASKED_THIS_PART",
            )
        except litellm.AuthenticationError:
            pass
        mock_client.assert_called()
        print(f"mock_client.call_args.kwargs: {mock_client.call_args.kwargs}")
        assert (
            "LEAVE_ONLY_LAST_4_CHAR_UNMASKED_THIS_PART"
            not in mock_client.call_args.kwargs["kwargs"]["litellm_params"]["api_base"]
        )
        key = mock_client.call_args.kwargs["kwargs"]["litellm_params"]["api_base"]
        trimmed_key = key.split("key=")[1]
        trimmed_key = trimmed_key.replace("*", "")
        assert "PART" == trimmed_key
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -2342,3 +2342,55 @@ async def test_aaarouter_dynamic_cooldown_message_retry_time(sync_mode):
            assert e.cooldown_time == cooldown_time
        assert exception_raised
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio()
 async def test_router_weighted_pick(sync_mode):
    router = Router(
        model_list=[
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
                    "model": "gpt-3.5-turbo",
                    "weight": 2,
                    "mock_response": "Hello world 1!",
                },
                "model_info": {"id": "1"},
            },
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
                    "model": "gpt-3.5-turbo",
                    "weight": 1,
                    "mock_response": "Hello world 2!",
                },
                "model_info": {"id": "2"},
            },
        ]
    )
    model_id_1_count = 0
    model_id_2_count = 0
    for _ in range(50):
        # make 50 calls. expect model id 1 to be picked more than model id 2
        if sync_mode:
            response = router.completion(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": "Hello world!"}],
            )
        else:
            response = await router.acompletion(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": "Hello world!"}],
            )
        model_id = int(response._hidden_params["model_id"])
        if model_id == 1:
            model_id_1_count += 1
        elif model_id == 2:
            model_id_2_count += 1
        else:
            raise Exception("invalid model id returned!")
    assert model_id_1_count > model_id_2_count
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@ -299,6 +299,8 @@ class LiteLLMParamsTypedDict(TypedDict, total=False):
    custom_llm_provider: Optional[str]
    tpm: Optional[int]
    rpm: Optional[int]
    order: Optional[int]
    weight: Optional[int]
    api_key: Optional[str]
    api_base: Optional[str]
    api_version: Optional[str]
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -7475,6 +7475,14 @@ def exception_type(
                        ),
                        litellm_debug_info=extra_information,
                    )
                elif "API key not valid." in error_str:
                    exception_mapping_worked = True
                    raise AuthenticationError(
                        message=f"{custom_llm_provider}Exception - {error_str}",
                        model=model,
                        llm_provider=custom_llm_provider,
                        litellm_debug_info=extra_information,
                    )
                elif "403" in error_str:
                    exception_mapping_worked = True
                    raise BadRequestError(