diff --git a/litellm/llms/bedrock/chat/converse_handler.py b/litellm/llms/bedrock/chat/converse_handler.py
index b775cc64c..e47ba4f42 100644
--- a/litellm/llms/bedrock/chat/converse_handler.py
+++ b/litellm/llms/bedrock/chat/converse_handler.py
@@ -19,6 +19,7 @@ from ..common_utils import BedrockError
 from .invoke_handler import AWSEventStreamDecoder, MockResponseIterator, make_call
 
 BEDROCK_CONVERSE_MODELS = [
+    "anthropic.claude-3-5-haiku-20241022-v1:0",
     "anthropic.claude-3-5-sonnet-20241022-v2:0",
     "anthropic.claude-3-5-sonnet-20240620-v1:0",
     "anthropic.claude-3-opus-20240229-v1:0",
diff --git a/litellm/main.py b/litellm/main.py
index f89a6f2e3..8334f35d7 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -4319,9 +4319,9 @@ async def amoderation(
     else:
         _openai_client = openai_client
     if model is not None:
-        response = await openai_client.moderations.create(input=input, model=model)
+        response = await _openai_client.moderations.create(input=input, model=model)
     else:
-        response = await openai_client.moderations.create(input=input)
+        response = await _openai_client.moderations.create(input=input)
     return response
 
 
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 5cf293864..d0bd5f674 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -23,6 +23,31 @@ model_list:
       model: openai/my-fake-model
       api_key: my-fake-key
       api_base: https://exampleopenaiendpoint-production.up.railway.app/
+  ## bedrock chat completions
+  - model_name: "*anthropic.claude*"
+    litellm_params:
+      model: bedrock/*anthropic.claude*
+      aws_access_key_id: os.environ/BEDROCK_AWS_ACCESS_KEY_ID
+      aws_secret_access_key: os.environ/BEDROCK_AWS_SECRET_ACCESS_KEY
+      aws_region_name: os.environ/AWS_REGION_NAME
+      guardrailConfig:
+        "guardrailIdentifier": "h4dsqwhp6j66"
+        "guardrailVersion": "2"
+        "trace": "enabled"
+        
+## bedrock embeddings
+  - model_name: "*amazon.titan-embed-*"
+    litellm_params:
+      model: bedrock/amazon.titan-embed-*
+      aws_access_key_id: os.environ/BEDROCK_AWS_ACCESS_KEY_ID
+      aws_secret_access_key: os.environ/BEDROCK_AWS_SECRET_ACCESS_KEY
+      aws_region_name: os.environ/AWS_REGION_NAME
+  - model_name: "*cohere.embed-*"
+    litellm_params:
+      model: bedrock/cohere.embed-*
+      aws_access_key_id: os.environ/BEDROCK_AWS_ACCESS_KEY_ID
+      aws_secret_access_key: os.environ/BEDROCK_AWS_SECRET_ACCESS_KEY
+      aws_region_name: os.environ/AWS_REGION_NAME
 
   - model_name: gpt-4
     litellm_params:
@@ -33,6 +58,7 @@ model_list:
       rpm: 480
       timeout: 300
       stream_timeout: 60
+
 # litellm_settings:
 #   fallbacks: [{ "claude-3-5-sonnet-20240620": ["claude-3-5-sonnet-aihubmix"] }]
 #   callbacks: ["otel", "prometheus"]
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index 9aebd9071..fd9ef8556 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -436,15 +436,7 @@ class LiteLLM_JWTAuth(LiteLLMBase):
     """
 
     admin_jwt_scope: str = "litellm_proxy_admin"
-    admin_allowed_routes: List[
-        Literal[
-            "openai_routes",
-            "info_routes",
-            "management_routes",
-            "spend_tracking_routes",
-            "global_spend_tracking_routes",
-        ]
-    ] = [
+    admin_allowed_routes: List[str] = [
         "management_routes",
         "spend_tracking_routes",
         "global_spend_tracking_routes",
diff --git a/litellm/proxy/auth/litellm_license.py b/litellm/proxy/auth/litellm_license.py
index 784b4274e..a736a1f5e 100644
--- a/litellm/proxy/auth/litellm_license.py
+++ b/litellm/proxy/auth/litellm_license.py
@@ -5,6 +5,9 @@ import json
 import os
 import traceback
 from datetime import datetime
+from typing import Optional
+
+import httpx
 
 from litellm._logging import verbose_proxy_logger
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
@@ -44,23 +47,46 @@ class LicenseCheck:
             verbose_proxy_logger.error(f"Error reading public key: {str(e)}")
 
     def _verify(self, license_str: str) -> bool:
+
+        verbose_proxy_logger.debug(
+            "litellm.proxy.auth.litellm_license.py::_verify - Checking license against {}/verify_license - {}".format(
+                self.base_url, license_str
+            )
+        )
         url = "{}/verify_license/{}".format(self.base_url, license_str)
 
+        response: Optional[httpx.Response] = None
         try:  # don't impact user, if call fails
-            response = self.http_handler.get(url=url)
+            num_retries = 3
+            for i in range(num_retries):
+                try:
+                    response = self.http_handler.get(url=url)
+                    if response is None:
+                        raise Exception("No response from license server")
+                    response.raise_for_status()
+                except httpx.HTTPStatusError:
+                    if i == num_retries - 1:
+                        raise
 
-            response.raise_for_status()
+            if response is None:
+                raise Exception("No response from license server")
 
             response_json = response.json()
 
             premium = response_json["verify"]
 
             assert isinstance(premium, bool)
+
+            verbose_proxy_logger.debug(
+                "litellm.proxy.auth.litellm_license.py::_verify - License={} is premium={}".format(
+                    license_str, premium
+                )
+            )
             return premium
         except Exception as e:
-            verbose_proxy_logger.error(
-                "litellm.proxy.auth.litellm_license.py::_verify - Unable to verify License via api. - {}".format(
-                    str(e)
+            verbose_proxy_logger.exception(
+                "litellm.proxy.auth.litellm_license.py::_verify - Unable to verify License={} via api. - {}".format(
+                    license_str, str(e)
                 )
             )
             return False
@@ -72,7 +98,7 @@ class LicenseCheck:
         """
         try:
             verbose_proxy_logger.debug(
-                "litellm.proxy.auth.litellm_license.py::is_premium() - ENTERING 'IS_PREMIUM' - {}".format(
+                "litellm.proxy.auth.litellm_license.py::is_premium() - ENTERING 'IS_PREMIUM' - LiteLLM License={}".format(
                     self.license_str
                 )
             )
diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
index 1fb628a80..f9f8276c7 100644
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@@ -694,6 +694,9 @@ def run_server(  # noqa: PLR0915
 
         import litellm
 
+        if detailed_debug is True:
+            litellm._turn_on_debug()
+
         # DO NOT DELETE - enables global variables to work across files
         from litellm.proxy.proxy_server import app  # noqa
 
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 94a5bb5e9..8edf2cee3 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -3074,6 +3074,15 @@ async def startup_event():
             user_api_key_cache=user_api_key_cache,
         )
 
+    ## CHECK PREMIUM USER
+    verbose_proxy_logger.debug(
+        "litellm.proxy.proxy_server.py::startup() - CHECKING PREMIUM USER - {}".format(
+            premium_user
+        )
+    )
+    if premium_user is False:
+        premium_user = _license_check.is_premium()
+
     ### LOAD CONFIG ###
     worker_config: Optional[Union[str, dict]] = get_secret("WORKER_CONFIG")  # type: ignore
     env_config_yaml: Optional[str] = get_secret_str("CONFIG_FILE_PATH")
@@ -3121,21 +3130,6 @@ async def startup_event():
             if isinstance(worker_config, dict):
                 await initialize(**worker_config)
 
-    ## CHECK PREMIUM USER
-    verbose_proxy_logger.debug(
-        "litellm.proxy.proxy_server.py::startup() - CHECKING PREMIUM USER - {}".format(
-            premium_user
-        )
-    )
-    if premium_user is False:
-        premium_user = _license_check.is_premium()
-
-    verbose_proxy_logger.debug(
-        "litellm.proxy.proxy_server.py::startup() - PREMIUM USER value - {}".format(
-            premium_user
-        )
-    )
-
     ProxyStartupEvent._initialize_startup_logging(
         llm_router=llm_router,
         proxy_logging_obj=proxy_logging_obj,
diff --git a/litellm/proxy/route_llm_request.py b/litellm/proxy/route_llm_request.py
index fcf95f6ab..3c5c8b3b4 100644
--- a/litellm/proxy/route_llm_request.py
+++ b/litellm/proxy/route_llm_request.py
@@ -65,6 +65,7 @@ async def route_request(
     Common helper to route the request
 
     """
+
     router_model_names = llm_router.model_names if llm_router is not None else []
     if "api_key" in data or "api_base" in data:
         return getattr(litellm, f"{route_type}")(**data)
diff --git a/litellm/router.py b/litellm/router.py
index 726119cb7..759f94f74 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -556,6 +556,10 @@ class Router:
 
         self.initialize_assistants_endpoint()
 
+        self.amoderation = self.factory_function(
+            litellm.amoderation, call_type="moderation"
+        )
+
     def initialize_assistants_endpoint(self):
         ## INITIALIZE PASS THROUGH ASSISTANTS ENDPOINT ##
         self.acreate_assistants = self.factory_function(litellm.acreate_assistants)
@@ -1683,78 +1687,6 @@ class Router:
             )
             raise e
 
-    async def amoderation(self, model: str, input: str, **kwargs):
-        try:
-            kwargs["model"] = model
-            kwargs["input"] = input
-            kwargs["original_function"] = self._amoderation
-            kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
-            kwargs.get("request_timeout", self.timeout)
-            kwargs.setdefault("metadata", {}).update({"model_group": model})
-
-            response = await self.async_function_with_fallbacks(**kwargs)
-
-            return response
-        except Exception as e:
-            asyncio.create_task(
-                send_llm_exception_alert(
-                    litellm_router_instance=self,
-                    request_kwargs=kwargs,
-                    error_traceback_str=traceback.format_exc(),
-                    original_exception=e,
-                )
-            )
-            raise e
-
-    async def _amoderation(self, model: str, input: str, **kwargs):
-        model_name = None
-        try:
-            verbose_router_logger.debug(
-                f"Inside _moderation()- model: {model}; kwargs: {kwargs}"
-            )
-            deployment = await self.async_get_available_deployment(
-                model=model,
-                input=input,
-                specific_deployment=kwargs.pop("specific_deployment", None),
-            )
-            self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
-            data = deployment["litellm_params"].copy()
-            model_name = data["model"]
-            model_client = self._get_async_openai_model_client(
-                deployment=deployment,
-                kwargs=kwargs,
-            )
-            self.total_calls[model_name] += 1
-
-            timeout: Optional[Union[float, int]] = self._get_timeout(
-                kwargs=kwargs,
-                data=data,
-            )
-
-            response = await litellm.amoderation(
-                **{
-                    **data,
-                    "input": input,
-                    "caching": self.cache_responses,
-                    "client": model_client,
-                    "timeout": timeout,
-                    **kwargs,
-                }
-            )
-
-            self.success_calls[model_name] += 1
-            verbose_router_logger.info(
-                f"litellm.amoderation(model={model_name})\033[32m 200 OK\033[0m"
-            )
-            return response
-        except Exception as e:
-            verbose_router_logger.info(
-                f"litellm.amoderation(model={model_name})\033[31m Exception {str(e)}\033[0m"
-            )
-            if model_name is not None:
-                self.fail_calls[model_name] += 1
-            raise e
-
     async def arerank(self, model: str, **kwargs):
         try:
             kwargs["model"] = model
@@ -2610,20 +2542,46 @@ class Router:
 
         return final_results
 
-    #### ASSISTANTS API ####
+    #### PASSTHROUGH API ####
 
-    def factory_function(self, original_function: Callable):
+    async def _pass_through_moderation_endpoint_factory(
+        self,
+        original_function: Callable,
+        **kwargs,
+    ):
+        if (
+            "model" in kwargs
+            and self.get_model_list(model_name=kwargs["model"]) is not None
+        ):
+            deployment = await self.async_get_available_deployment(
+                model=kwargs["model"]
+            )
+            kwargs["model"] = deployment["litellm_params"]["model"]
+        return await original_function(**kwargs)
+
+    def factory_function(
+        self,
+        original_function: Callable,
+        call_type: Literal["assistants", "moderation"] = "assistants",
+    ):
         async def new_function(
             custom_llm_provider: Optional[Literal["openai", "azure"]] = None,
             client: Optional["AsyncOpenAI"] = None,
             **kwargs,
         ):
-            return await self._pass_through_assistants_endpoint_factory(
-                original_function=original_function,
-                custom_llm_provider=custom_llm_provider,
-                client=client,
-                **kwargs,
-            )
+            if call_type == "assistants":
+                return await self._pass_through_assistants_endpoint_factory(
+                    original_function=original_function,
+                    custom_llm_provider=custom_llm_provider,
+                    client=client,
+                    **kwargs,
+                )
+            elif call_type == "moderation":
+
+                return await self._pass_through_moderation_endpoint_factory(  # type: ignore
+                    original_function=original_function,
+                    **kwargs,
+                )
 
         return new_function
 
@@ -5052,10 +5010,12 @@ class Router:
         )
 
         if len(healthy_deployments) == 0:
-            raise ValueError(
-                "{}. You passed in model={}. There is no 'model_name' with this string ".format(
-                    RouterErrors.no_deployments_available.value, model
-                )
+            raise litellm.BadRequestError(
+                message="You passed in model={}. There is no 'model_name' with this string ".format(
+                    model
+                ),
+                model=model,
+                llm_provider="",
             )
 
         if litellm.model_alias_map and model in litellm.model_alias_map:
diff --git a/litellm/utils.py b/litellm/utils.py
index 6dd0a5009..efda579d6 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1043,6 +1043,7 @@ def client(original_function):  # noqa: PLR0915
             if (
                 call_type != CallTypes.aimage_generation.value  # model optional
                 and call_type != CallTypes.atext_completion.value  # can also be engine
+                and call_type != CallTypes.amoderation.value
             ):
                 raise ValueError("model param not passed in.")
 
diff --git a/tests/local_testing/test_jwt.py b/tests/local_testing/test_jwt.py
index 15ce4192d..ad929ba4f 100644
--- a/tests/local_testing/test_jwt.py
+++ b/tests/local_testing/test_jwt.py
@@ -689,9 +689,10 @@ async def aaaatest_user_token_output(
     assert team_result.user_id == user_id
 
 
+@pytest.mark.parametrize("admin_allowed_routes", [None, ["ui_routes"]])
 @pytest.mark.parametrize("audience", [None, "litellm-proxy"])
 @pytest.mark.asyncio
-async def test_allowed_routes_admin(prisma_client, audience):
+async def test_allowed_routes_admin(prisma_client, audience, admin_allowed_routes):
     """
     Add a check to make sure jwt proxy admin scope can access all allowed admin routes
 
@@ -754,12 +755,17 @@ async def test_allowed_routes_admin(prisma_client, audience):
 
     jwt_handler.user_api_key_cache = cache
 
-    jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth(team_id_jwt_field="client_id")
+    if admin_allowed_routes:
+        jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth(
+            team_id_jwt_field="client_id", admin_allowed_routes=admin_allowed_routes
+        )
+    else:
+        jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth(team_id_jwt_field="client_id")
 
     # VALID TOKEN
     ## GENERATE A TOKEN
     # Assuming the current time is in UTC
-    expiration_time = int((datetime.utcnow() + timedelta(minutes=10)).timestamp())
+    expiration_time = int((datetime.now() + timedelta(minutes=10)).timestamp())
 
     # Generate the JWT token
     # But before, you should convert bytes to string
@@ -777,6 +783,7 @@ async def test_allowed_routes_admin(prisma_client, audience):
 
     # verify token
 
+    print(f"admin_token: {admin_token}")
     response = await jwt_handler.auth_jwt(token=admin_token)
 
     ## RUN IT THROUGH USER API KEY AUTH
diff --git a/tests/local_testing/test_router.py b/tests/local_testing/test_router.py
index 8884f4c3a..cd5e8f6b2 100644
--- a/tests/local_testing/test_router.py
+++ b/tests/local_testing/test_router.py
@@ -1866,16 +1866,9 @@ async def test_router_amoderation():
     router = Router(model_list=model_list)
     ## Test 1: user facing function
     result = await router.amoderation(
-        model="openai-moderations", input="this is valid good text"
+        model="text-moderation-stable", input="this is valid good text"
     )
 
-    ## Test 2: underlying function
-    result = await router._amoderation(
-        model="openai-moderations", input="this is valid good text"
-    )
-
-    print("moderation result", result)
-
 
 def test_router_add_deployment():
     initial_model_list = [
diff --git a/tests/local_testing/test_router_fallbacks.py b/tests/local_testing/test_router_fallbacks.py
index 3e91cd79a..a5149b9fa 100644
--- a/tests/local_testing/test_router_fallbacks.py
+++ b/tests/local_testing/test_router_fallbacks.py
@@ -1226,9 +1226,7 @@ async def test_using_default_fallback(sync_mode):
         pytest.fail(f"Expected call to fail we passed model=openai/foo")
     except Exception as e:
         print("got exception = ", e)
-        from litellm.types.router import RouterErrors
-
-        assert RouterErrors.no_deployments_available.value in str(e)
+        assert "BadRequestError" in str(e)
 
 
 @pytest.mark.parametrize("sync_mode", [False])
diff --git a/tests/local_testing/test_router_pattern_matching.py b/tests/local_testing/test_router_pattern_matching.py
index 9d8c4db0d..2a6f66105 100644
--- a/tests/local_testing/test_router_pattern_matching.py
+++ b/tests/local_testing/test_router_pattern_matching.py
@@ -158,6 +158,46 @@ def test_route_with_exception():
     assert result is None
 
 
+@pytest.mark.asyncio
+async def test_route_with_no_matching_pattern():
+    """
+    Tests that the router returns None when there is no matching pattern
+    """
+    from litellm.types.router import RouterErrors
+
+    router = Router(
+        model_list=[
+            {
+                "model_name": "*meta.llama3*",
+                "litellm_params": {"model": "bedrock/meta.llama3*"},
+            }
+        ]
+    )
+
+    ## WORKS
+    result = await router.acompletion(
+        model="bedrock/meta.llama3-70b",
+        messages=[{"role": "user", "content": "Hello, world!"}],
+        mock_response="Works",
+    )
+    assert result.choices[0].message.content == "Works"
+
+    ## FAILS
+    with pytest.raises(litellm.BadRequestError) as e:
+        await router.acompletion(
+            model="my-fake-model",
+            messages=[{"role": "user", "content": "Hello, world!"}],
+            mock_response="Works",
+        )
+
+    assert RouterErrors.no_deployments_available.value not in str(e.value)
+
+    with pytest.raises(litellm.BadRequestError):
+        await router.aembedding(
+            model="my-fake-model",
+            input="Hello, world!",
+        )
+
 def test_router_pattern_match_e2e():
     """
     Tests the end to end flow of the router
@@ -188,3 +228,4 @@ def test_router_pattern_match_e2e():
             "model": "gpt-4o",
             "messages": [{"role": "user", "content": "Hello, how are you?"}],
         }
+
diff --git a/tests/router_unit_tests/test_router_helper_utils.py b/tests/router_unit_tests/test_router_helper_utils.py
index ddd7a502c..cabb4a899 100644
--- a/tests/router_unit_tests/test_router_helper_utils.py
+++ b/tests/router_unit_tests/test_router_helper_utils.py
@@ -999,3 +999,10 @@ def test_pattern_match_deployment_set_model_name(
 
     for model in updated_models:
         assert model["litellm_params"]["model"] == expected_model
+
+@pytest.mark.asyncio
+async def test_pass_through_moderation_endpoint_factory(model_list):
+    router = Router(model_list=model_list)
+    response = await router._pass_through_moderation_endpoint_factory(
+        original_function=litellm.amoderation, input="this is valid good text"
+    )
\ No newline at end of file