LiteLLM Minor Fixes & Improvements (11/06/2024) (#6624)

* refactor(proxy_server.py): add debug logging around license check event (refactor position in startup_event logic) * fix(proxy/_types.py): allow admin_allowed_routes to be any str * fix(router.py): raise 400-status code error for no 'model_name' error on router Fixes issue with status code when unknown model name passed with pattern matching enabled * fix(converse_handler.py): add claude 3-5 haiku to bedrock converse models * test: update testing to replace claude-instant-1.2 * fix(router.py): fix router.moderation calls * test: update test to remove claude-instant-1 * fix(router.py): support model_list values in router.moderation * test: fix test * test: fix test
2025-04-25 10:44:24 +00:00 · 2024-11-07 04:37:32 +05:30 · 2024-11-07 04:37:32 +05:30 · 0c204d33bc
commit 0c204d33bc
parent 136693cac4
15 changed files with 180 additions and 130 deletions
--- a/litellm/llms/bedrock/chat/converse_handler.py
+++ b/litellm/llms/bedrock/chat/converse_handler.py
@ -19,6 +19,7 @@ from ..common_utils import BedrockError
 from .invoke_handler import AWSEventStreamDecoder, MockResponseIterator, make_call

 BEDROCK_CONVERSE_MODELS = [
+    "anthropic.claude-3-5-haiku-20241022-v1:0",
    "anthropic.claude-3-5-sonnet-20241022-v2:0",
    "anthropic.claude-3-5-sonnet-20240620-v1:0",
    "anthropic.claude-3-opus-20240229-v1:0",
--- a/litellm/main.py
+++ b/litellm/main.py
@ -4319,9 +4319,9 @@ async def amoderation(
    else:
        _openai_client = openai_client
    if model is not None:
-        response = await openai_client.moderations.create(input=input, model=model)
+        response = await _openai_client.moderations.create(input=input, model=model)
    else:
-        response = await openai_client.moderations.create(input=input)
+        response = await _openai_client.moderations.create(input=input)
    return response


--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -23,6 +23,31 @@ model_list:
      model: openai/my-fake-model
      api_key: my-fake-key
      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+  ## bedrock chat completions
+  - model_name: "*anthropic.claude*"
+    litellm_params:
+      model: bedrock/*anthropic.claude*
+      aws_access_key_id: os.environ/BEDROCK_AWS_ACCESS_KEY_ID
+      aws_secret_access_key: os.environ/BEDROCK_AWS_SECRET_ACCESS_KEY
+      aws_region_name: os.environ/AWS_REGION_NAME
+      guardrailConfig:
+        "guardrailIdentifier": "h4dsqwhp6j66"
+        "guardrailVersion": "2"
+        "trace": "enabled"
+        
+## bedrock embeddings
+  - model_name: "*amazon.titan-embed-*"
+    litellm_params:
+      model: bedrock/amazon.titan-embed-*
+      aws_access_key_id: os.environ/BEDROCK_AWS_ACCESS_KEY_ID
+      aws_secret_access_key: os.environ/BEDROCK_AWS_SECRET_ACCESS_KEY
+      aws_region_name: os.environ/AWS_REGION_NAME
+  - model_name: "*cohere.embed-*"
+    litellm_params:
+      model: bedrock/cohere.embed-*
+      aws_access_key_id: os.environ/BEDROCK_AWS_ACCESS_KEY_ID
+      aws_secret_access_key: os.environ/BEDROCK_AWS_SECRET_ACCESS_KEY
+      aws_region_name: os.environ/AWS_REGION_NAME

  - model_name: gpt-4
    litellm_params:
@ -33,6 +58,7 @@ model_list:
      rpm: 480
      timeout: 300
      stream_timeout: 60
+
 # litellm_settings:
 #   fallbacks: [{ "claude-3-5-sonnet-20240620": ["claude-3-5-sonnet-aihubmix"] }]
 #   callbacks: ["otel", "prometheus"]
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -436,15 +436,7 @@ class LiteLLM_JWTAuth(LiteLLMBase):
    """

    admin_jwt_scope: str = "litellm_proxy_admin"
-    admin_allowed_routes: List[
-        Literal[
-            "openai_routes",
-            "info_routes",
-            "management_routes",
-            "spend_tracking_routes",
-            "global_spend_tracking_routes",
-        ]
-    ] = [
+    admin_allowed_routes: List[str] = [
        "management_routes",
        "spend_tracking_routes",
        "global_spend_tracking_routes",
--- a/litellm/proxy/auth/litellm_license.py
+++ b/litellm/proxy/auth/litellm_license.py
@ -5,6 +5,9 @@ import json
 import os
 import traceback
 from datetime import datetime
+from typing import Optional
+
+import httpx

 from litellm._logging import verbose_proxy_logger
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
@ -44,23 +47,46 @@ class LicenseCheck:
            verbose_proxy_logger.error(f"Error reading public key: {str(e)}")

    def _verify(self, license_str: str) -> bool:
+
+        verbose_proxy_logger.debug(
+            "litellm.proxy.auth.litellm_license.py::_verify - Checking license against {}/verify_license - {}".format(
+                self.base_url, license_str
+            )
+        )
        url = "{}/verify_license/{}".format(self.base_url, license_str)

+        response: Optional[httpx.Response] = None
        try:  # don't impact user, if call fails
+            num_retries = 3
+            for i in range(num_retries):
+                try:
                    response = self.http_handler.get(url=url)
-
+                    if response is None:
+                        raise Exception("No response from license server")
                    response.raise_for_status()
+                except httpx.HTTPStatusError:
+                    if i == num_retries - 1:
+                        raise
+
+            if response is None:
+                raise Exception("No response from license server")

            response_json = response.json()

            premium = response_json["verify"]

            assert isinstance(premium, bool)
+
+            verbose_proxy_logger.debug(
+                "litellm.proxy.auth.litellm_license.py::_verify - License={} is premium={}".format(
+                    license_str, premium
+                )
+            )
            return premium
        except Exception as e:
-            verbose_proxy_logger.error(
-                "litellm.proxy.auth.litellm_license.py::_verify - Unable to verify License via api. - {}".format(
-                    str(e)
+            verbose_proxy_logger.exception(
+                "litellm.proxy.auth.litellm_license.py::_verify - Unable to verify License={} via api. - {}".format(
+                    license_str, str(e)
                )
            )
            return False
@ -72,7 +98,7 @@ class LicenseCheck:
        """
        try:
            verbose_proxy_logger.debug(
-                "litellm.proxy.auth.litellm_license.py::is_premium() - ENTERING 'IS_PREMIUM' - {}".format(
+                "litellm.proxy.auth.litellm_license.py::is_premium() - ENTERING 'IS_PREMIUM' - LiteLLM License={}".format(
                    self.license_str
                )
            )
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@ -694,6 +694,9 @@ def run_server(  # noqa: PLR0915

        import litellm

+        if detailed_debug is True:
+            litellm._turn_on_debug()
+
        # DO NOT DELETE - enables global variables to work across files
        from litellm.proxy.proxy_server import app  # noqa

--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -3074,6 +3074,15 @@ async def startup_event():
            user_api_key_cache=user_api_key_cache,
        )

+    ## CHECK PREMIUM USER
+    verbose_proxy_logger.debug(
+        "litellm.proxy.proxy_server.py::startup() - CHECKING PREMIUM USER - {}".format(
+            premium_user
+        )
+    )
+    if premium_user is False:
+        premium_user = _license_check.is_premium()
+
    ### LOAD CONFIG ###
    worker_config: Optional[Union[str, dict]] = get_secret("WORKER_CONFIG")  # type: ignore
    env_config_yaml: Optional[str] = get_secret_str("CONFIG_FILE_PATH")
@ -3121,21 +3130,6 @@ async def startup_event():
            if isinstance(worker_config, dict):
                await initialize(**worker_config)

-    ## CHECK PREMIUM USER
-    verbose_proxy_logger.debug(
-        "litellm.proxy.proxy_server.py::startup() - CHECKING PREMIUM USER - {}".format(
-            premium_user
-        )
-    )
-    if premium_user is False:
-        premium_user = _license_check.is_premium()
-
-    verbose_proxy_logger.debug(
-        "litellm.proxy.proxy_server.py::startup() - PREMIUM USER value - {}".format(
-            premium_user
-        )
-    )
-
    ProxyStartupEvent._initialize_startup_logging(
        llm_router=llm_router,
        proxy_logging_obj=proxy_logging_obj,
--- a/litellm/proxy/route_llm_request.py
+++ b/litellm/proxy/route_llm_request.py
@ -65,6 +65,7 @@ async def route_request(
    Common helper to route the request

    """
+
    router_model_names = llm_router.model_names if llm_router is not None else []
    if "api_key" in data or "api_base" in data:
        return getattr(litellm, f"{route_type}")(**data)
--- a/litellm/router.py
+++ b/litellm/router.py
@ -556,6 +556,10 @@ class Router:

        self.initialize_assistants_endpoint()

+        self.amoderation = self.factory_function(
+            litellm.amoderation, call_type="moderation"
+        )
+
    def initialize_assistants_endpoint(self):
        ## INITIALIZE PASS THROUGH ASSISTANTS ENDPOINT ##
        self.acreate_assistants = self.factory_function(litellm.acreate_assistants)
@ -1683,78 +1687,6 @@ class Router:
            )
            raise e

-    async def amoderation(self, model: str, input: str, **kwargs):
-        try:
-            kwargs["model"] = model
-            kwargs["input"] = input
-            kwargs["original_function"] = self._amoderation
-            kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
-            kwargs.get("request_timeout", self.timeout)
-            kwargs.setdefault("metadata", {}).update({"model_group": model})
-
-            response = await self.async_function_with_fallbacks(**kwargs)
-
-            return response
-        except Exception as e:
-            asyncio.create_task(
-                send_llm_exception_alert(
-                    litellm_router_instance=self,
-                    request_kwargs=kwargs,
-                    error_traceback_str=traceback.format_exc(),
-                    original_exception=e,
-                )
-            )
-            raise e
-
-    async def _amoderation(self, model: str, input: str, **kwargs):
-        model_name = None
-        try:
-            verbose_router_logger.debug(
-                f"Inside _moderation()- model: {model}; kwargs: {kwargs}"
-            )
-            deployment = await self.async_get_available_deployment(
-                model=model,
-                input=input,
-                specific_deployment=kwargs.pop("specific_deployment", None),
-            )
-            self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
-            data = deployment["litellm_params"].copy()
-            model_name = data["model"]
-            model_client = self._get_async_openai_model_client(
-                deployment=deployment,
-                kwargs=kwargs,
-            )
-            self.total_calls[model_name] += 1
-
-            timeout: Optional[Union[float, int]] = self._get_timeout(
-                kwargs=kwargs,
-                data=data,
-            )
-
-            response = await litellm.amoderation(
-                **{
-                    **data,
-                    "input": input,
-                    "caching": self.cache_responses,
-                    "client": model_client,
-                    "timeout": timeout,
-                    **kwargs,
-                }
-            )
-
-            self.success_calls[model_name] += 1
-            verbose_router_logger.info(
-                f"litellm.amoderation(model={model_name})\033[32m 200 OK\033[0m"
-            )
-            return response
-        except Exception as e:
-            verbose_router_logger.info(
-                f"litellm.amoderation(model={model_name})\033[31m Exception {str(e)}\033[0m"
-            )
-            if model_name is not None:
-                self.fail_calls[model_name] += 1
-            raise e
-
    async def arerank(self, model: str, **kwargs):
        try:
            kwargs["model"] = model
@ -2610,20 +2542,46 @@ class Router:

        return final_results

-    #### ASSISTANTS API ####
+    #### PASSTHROUGH API ####

-    def factory_function(self, original_function: Callable):
+    async def _pass_through_moderation_endpoint_factory(
+        self,
+        original_function: Callable,
+        **kwargs,
+    ):
+        if (
+            "model" in kwargs
+            and self.get_model_list(model_name=kwargs["model"]) is not None
+        ):
+            deployment = await self.async_get_available_deployment(
+                model=kwargs["model"]
+            )
+            kwargs["model"] = deployment["litellm_params"]["model"]
+        return await original_function(**kwargs)
+
+    def factory_function(
+        self,
+        original_function: Callable,
+        call_type: Literal["assistants", "moderation"] = "assistants",
+    ):
        async def new_function(
            custom_llm_provider: Optional[Literal["openai", "azure"]] = None,
            client: Optional["AsyncOpenAI"] = None,
            **kwargs,
        ):
+            if call_type == "assistants":
                return await self._pass_through_assistants_endpoint_factory(
                    original_function=original_function,
                    custom_llm_provider=custom_llm_provider,
                    client=client,
                    **kwargs,
                )
+            elif call_type == "moderation":
+
+                return await self._pass_through_moderation_endpoint_factory(  # type: ignore
+                    original_function=original_function,
+                    **kwargs,
+                )

        return new_function

@ -5052,10 +5010,12 @@ class Router:
        )

        if len(healthy_deployments) == 0:
-            raise ValueError(
-                "{}. You passed in model={}. There is no 'model_name' with this string ".format(
-                    RouterErrors.no_deployments_available.value, model
-                )
+            raise litellm.BadRequestError(
+                message="You passed in model={}. There is no 'model_name' with this string ".format(
+                    model
+                ),
+                model=model,
+                llm_provider="",
            )

        if litellm.model_alias_map and model in litellm.model_alias_map:
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1043,6 +1043,7 @@ def client(original_function):  # noqa: PLR0915
            if (
                call_type != CallTypes.aimage_generation.value  # model optional
                and call_type != CallTypes.atext_completion.value  # can also be engine
+                and call_type != CallTypes.amoderation.value
            ):
                raise ValueError("model param not passed in.")

--- a/tests/local_testing/test_jwt.py
+++ b/tests/local_testing/test_jwt.py
@ -689,9 +689,10 @@ async def aaaatest_user_token_output(
    assert team_result.user_id == user_id


+@pytest.mark.parametrize("admin_allowed_routes", [None, ["ui_routes"]])
@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
@pytest.mark.asyncio
-async def test_allowed_routes_admin(prisma_client, audience):
+async def test_allowed_routes_admin(prisma_client, audience, admin_allowed_routes):
    """
    Add a check to make sure jwt proxy admin scope can access all allowed admin routes

@ -754,12 +755,17 @@ async def test_allowed_routes_admin(prisma_client, audience):

    jwt_handler.user_api_key_cache = cache

+    if admin_allowed_routes:
+        jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth(
+            team_id_jwt_field="client_id", admin_allowed_routes=admin_allowed_routes
+        )
+    else:
        jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth(team_id_jwt_field="client_id")

    # VALID TOKEN
    ## GENERATE A TOKEN
    # Assuming the current time is in UTC
-    expiration_time = int((datetime.utcnow() + timedelta(minutes=10)).timestamp())
+    expiration_time = int((datetime.now() + timedelta(minutes=10)).timestamp())

    # Generate the JWT token
    # But before, you should convert bytes to string
@ -777,6 +783,7 @@ async def test_allowed_routes_admin(prisma_client, audience):

    # verify token

+    print(f"admin_token: {admin_token}")
    response = await jwt_handler.auth_jwt(token=admin_token)

    ## RUN IT THROUGH USER API KEY AUTH
--- a/tests/local_testing/test_router.py
+++ b/tests/local_testing/test_router.py
@ -1866,16 +1866,9 @@ async def test_router_amoderation():
    router = Router(model_list=model_list)
    ## Test 1: user facing function
    result = await router.amoderation(
-        model="openai-moderations", input="this is valid good text"
+        model="text-moderation-stable", input="this is valid good text"
    )

-    ## Test 2: underlying function
-    result = await router._amoderation(
-        model="openai-moderations", input="this is valid good text"
-    )
-
-    print("moderation result", result)
-

 def test_router_add_deployment():
    initial_model_list = [
--- a/tests/local_testing/test_router_fallbacks.py
+++ b/tests/local_testing/test_router_fallbacks.py
@ -1226,9 +1226,7 @@ async def test_using_default_fallback(sync_mode):
        pytest.fail(f"Expected call to fail we passed model=openai/foo")
    except Exception as e:
        print("got exception = ", e)
-        from litellm.types.router import RouterErrors
-
-        assert RouterErrors.no_deployments_available.value in str(e)
+        assert "BadRequestError" in str(e)


@pytest.mark.parametrize("sync_mode", [False])
--- a/tests/local_testing/test_router_pattern_matching.py
+++ b/tests/local_testing/test_router_pattern_matching.py
@ -158,6 +158,46 @@ def test_route_with_exception():
    assert result is None


+@pytest.mark.asyncio
+async def test_route_with_no_matching_pattern():
+    """
+    Tests that the router returns None when there is no matching pattern
+    """
+    from litellm.types.router import RouterErrors
+
+    router = Router(
+        model_list=[
+            {
+                "model_name": "*meta.llama3*",
+                "litellm_params": {"model": "bedrock/meta.llama3*"},
+            }
+        ]
+    )
+
+    ## WORKS
+    result = await router.acompletion(
+        model="bedrock/meta.llama3-70b",
+        messages=[{"role": "user", "content": "Hello, world!"}],
+        mock_response="Works",
+    )
+    assert result.choices[0].message.content == "Works"
+
+    ## FAILS
+    with pytest.raises(litellm.BadRequestError) as e:
+        await router.acompletion(
+            model="my-fake-model",
+            messages=[{"role": "user", "content": "Hello, world!"}],
+            mock_response="Works",
+        )
+
+    assert RouterErrors.no_deployments_available.value not in str(e.value)
+
+    with pytest.raises(litellm.BadRequestError):
+        await router.aembedding(
+            model="my-fake-model",
+            input="Hello, world!",
+        )
+
 def test_router_pattern_match_e2e():
    """
    Tests the end to end flow of the router
@ -188,3 +228,4 @@ def test_router_pattern_match_e2e():
            "model": "gpt-4o",
            "messages": [{"role": "user", "content": "Hello, how are you?"}],
        }
+
--- a/tests/router_unit_tests/test_router_helper_utils.py
+++ b/tests/router_unit_tests/test_router_helper_utils.py
@ -999,3 +999,10 @@ def test_pattern_match_deployment_set_model_name(

    for model in updated_models:
        assert model["litellm_params"]["model"] == expected_model
+
+@pytest.mark.asyncio
+async def test_pass_through_moderation_endpoint_factory(model_list):
+    router = Router(model_list=model_list)
+    response = await router._pass_through_moderation_endpoint_factory(
+        original_function=litellm.amoderation, input="this is valid good text"
+    )