diff --git a/litellm/llms/bedrock/chat/converse_handler.py b/litellm/llms/bedrock/chat/converse_handler.py index b775cc64c..e47ba4f42 100644 --- a/litellm/llms/bedrock/chat/converse_handler.py +++ b/litellm/llms/bedrock/chat/converse_handler.py @@ -19,6 +19,7 @@ from ..common_utils import BedrockError from .invoke_handler import AWSEventStreamDecoder, MockResponseIterator, make_call BEDROCK_CONVERSE_MODELS = [ + "anthropic.claude-3-5-haiku-20241022-v1:0", "anthropic.claude-3-5-sonnet-20241022-v2:0", "anthropic.claude-3-5-sonnet-20240620-v1:0", "anthropic.claude-3-opus-20240229-v1:0", diff --git a/litellm/main.py b/litellm/main.py index f89a6f2e3..8334f35d7 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -4319,9 +4319,9 @@ async def amoderation( else: _openai_client = openai_client if model is not None: - response = await openai_client.moderations.create(input=input, model=model) + response = await _openai_client.moderations.create(input=input, model=model) else: - response = await openai_client.moderations.create(input=input) + response = await _openai_client.moderations.create(input=input) return response diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 5cf293864..d0bd5f674 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -23,6 +23,31 @@ model_list: model: openai/my-fake-model api_key: my-fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ + ## bedrock chat completions + - model_name: "*anthropic.claude*" + litellm_params: + model: bedrock/*anthropic.claude* + aws_access_key_id: os.environ/BEDROCK_AWS_ACCESS_KEY_ID + aws_secret_access_key: os.environ/BEDROCK_AWS_SECRET_ACCESS_KEY + aws_region_name: os.environ/AWS_REGION_NAME + guardrailConfig: + "guardrailIdentifier": "h4dsqwhp6j66" + "guardrailVersion": "2" + "trace": "enabled" + +## bedrock embeddings + - model_name: "*amazon.titan-embed-*" + litellm_params: + model: bedrock/amazon.titan-embed-* + aws_access_key_id: os.environ/BEDROCK_AWS_ACCESS_KEY_ID + aws_secret_access_key: os.environ/BEDROCK_AWS_SECRET_ACCESS_KEY + aws_region_name: os.environ/AWS_REGION_NAME + - model_name: "*cohere.embed-*" + litellm_params: + model: bedrock/cohere.embed-* + aws_access_key_id: os.environ/BEDROCK_AWS_ACCESS_KEY_ID + aws_secret_access_key: os.environ/BEDROCK_AWS_SECRET_ACCESS_KEY + aws_region_name: os.environ/AWS_REGION_NAME - model_name: gpt-4 litellm_params: @@ -33,6 +58,7 @@ model_list: rpm: 480 timeout: 300 stream_timeout: 60 + # litellm_settings: # fallbacks: [{ "claude-3-5-sonnet-20240620": ["claude-3-5-sonnet-aihubmix"] }] # callbacks: ["otel", "prometheus"] diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 9aebd9071..fd9ef8556 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -436,15 +436,7 @@ class LiteLLM_JWTAuth(LiteLLMBase): """ admin_jwt_scope: str = "litellm_proxy_admin" - admin_allowed_routes: List[ - Literal[ - "openai_routes", - "info_routes", - "management_routes", - "spend_tracking_routes", - "global_spend_tracking_routes", - ] - ] = [ + admin_allowed_routes: List[str] = [ "management_routes", "spend_tracking_routes", "global_spend_tracking_routes", diff --git a/litellm/proxy/auth/litellm_license.py b/litellm/proxy/auth/litellm_license.py index 784b4274e..a736a1f5e 100644 --- a/litellm/proxy/auth/litellm_license.py +++ b/litellm/proxy/auth/litellm_license.py @@ -5,6 +5,9 @@ import json import os import traceback from datetime import datetime +from typing import Optional + +import httpx from litellm._logging import verbose_proxy_logger from litellm.llms.custom_httpx.http_handler import HTTPHandler @@ -44,23 +47,46 @@ class LicenseCheck: verbose_proxy_logger.error(f"Error reading public key: {str(e)}") def _verify(self, license_str: str) -> bool: + + verbose_proxy_logger.debug( + "litellm.proxy.auth.litellm_license.py::_verify - Checking license against {}/verify_license - {}".format( + self.base_url, license_str + ) + ) url = "{}/verify_license/{}".format(self.base_url, license_str) + response: Optional[httpx.Response] = None try: # don't impact user, if call fails - response = self.http_handler.get(url=url) + num_retries = 3 + for i in range(num_retries): + try: + response = self.http_handler.get(url=url) + if response is None: + raise Exception("No response from license server") + response.raise_for_status() + except httpx.HTTPStatusError: + if i == num_retries - 1: + raise - response.raise_for_status() + if response is None: + raise Exception("No response from license server") response_json = response.json() premium = response_json["verify"] assert isinstance(premium, bool) + + verbose_proxy_logger.debug( + "litellm.proxy.auth.litellm_license.py::_verify - License={} is premium={}".format( + license_str, premium + ) + ) return premium except Exception as e: - verbose_proxy_logger.error( - "litellm.proxy.auth.litellm_license.py::_verify - Unable to verify License via api. - {}".format( - str(e) + verbose_proxy_logger.exception( + "litellm.proxy.auth.litellm_license.py::_verify - Unable to verify License={} via api. - {}".format( + license_str, str(e) ) ) return False @@ -72,7 +98,7 @@ class LicenseCheck: """ try: verbose_proxy_logger.debug( - "litellm.proxy.auth.litellm_license.py::is_premium() - ENTERING 'IS_PREMIUM' - {}".format( + "litellm.proxy.auth.litellm_license.py::is_premium() - ENTERING 'IS_PREMIUM' - LiteLLM License={}".format( self.license_str ) ) diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 1fb628a80..f9f8276c7 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -694,6 +694,9 @@ def run_server( # noqa: PLR0915 import litellm + if detailed_debug is True: + litellm._turn_on_debug() + # DO NOT DELETE - enables global variables to work across files from litellm.proxy.proxy_server import app # noqa diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 94a5bb5e9..8edf2cee3 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3074,6 +3074,15 @@ async def startup_event(): user_api_key_cache=user_api_key_cache, ) + ## CHECK PREMIUM USER + verbose_proxy_logger.debug( + "litellm.proxy.proxy_server.py::startup() - CHECKING PREMIUM USER - {}".format( + premium_user + ) + ) + if premium_user is False: + premium_user = _license_check.is_premium() + ### LOAD CONFIG ### worker_config: Optional[Union[str, dict]] = get_secret("WORKER_CONFIG") # type: ignore env_config_yaml: Optional[str] = get_secret_str("CONFIG_FILE_PATH") @@ -3121,21 +3130,6 @@ async def startup_event(): if isinstance(worker_config, dict): await initialize(**worker_config) - ## CHECK PREMIUM USER - verbose_proxy_logger.debug( - "litellm.proxy.proxy_server.py::startup() - CHECKING PREMIUM USER - {}".format( - premium_user - ) - ) - if premium_user is False: - premium_user = _license_check.is_premium() - - verbose_proxy_logger.debug( - "litellm.proxy.proxy_server.py::startup() - PREMIUM USER value - {}".format( - premium_user - ) - ) - ProxyStartupEvent._initialize_startup_logging( llm_router=llm_router, proxy_logging_obj=proxy_logging_obj, diff --git a/litellm/proxy/route_llm_request.py b/litellm/proxy/route_llm_request.py index fcf95f6ab..3c5c8b3b4 100644 --- a/litellm/proxy/route_llm_request.py +++ b/litellm/proxy/route_llm_request.py @@ -65,6 +65,7 @@ async def route_request( Common helper to route the request """ + router_model_names = llm_router.model_names if llm_router is not None else [] if "api_key" in data or "api_base" in data: return getattr(litellm, f"{route_type}")(**data) diff --git a/litellm/router.py b/litellm/router.py index 726119cb7..759f94f74 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -556,6 +556,10 @@ class Router: self.initialize_assistants_endpoint() + self.amoderation = self.factory_function( + litellm.amoderation, call_type="moderation" + ) + def initialize_assistants_endpoint(self): ## INITIALIZE PASS THROUGH ASSISTANTS ENDPOINT ## self.acreate_assistants = self.factory_function(litellm.acreate_assistants) @@ -1683,78 +1687,6 @@ class Router: ) raise e - async def amoderation(self, model: str, input: str, **kwargs): - try: - kwargs["model"] = model - kwargs["input"] = input - kwargs["original_function"] = self._amoderation - kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries) - kwargs.get("request_timeout", self.timeout) - kwargs.setdefault("metadata", {}).update({"model_group": model}) - - response = await self.async_function_with_fallbacks(**kwargs) - - return response - except Exception as e: - asyncio.create_task( - send_llm_exception_alert( - litellm_router_instance=self, - request_kwargs=kwargs, - error_traceback_str=traceback.format_exc(), - original_exception=e, - ) - ) - raise e - - async def _amoderation(self, model: str, input: str, **kwargs): - model_name = None - try: - verbose_router_logger.debug( - f"Inside _moderation()- model: {model}; kwargs: {kwargs}" - ) - deployment = await self.async_get_available_deployment( - model=model, - input=input, - specific_deployment=kwargs.pop("specific_deployment", None), - ) - self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs) - data = deployment["litellm_params"].copy() - model_name = data["model"] - model_client = self._get_async_openai_model_client( - deployment=deployment, - kwargs=kwargs, - ) - self.total_calls[model_name] += 1 - - timeout: Optional[Union[float, int]] = self._get_timeout( - kwargs=kwargs, - data=data, - ) - - response = await litellm.amoderation( - **{ - **data, - "input": input, - "caching": self.cache_responses, - "client": model_client, - "timeout": timeout, - **kwargs, - } - ) - - self.success_calls[model_name] += 1 - verbose_router_logger.info( - f"litellm.amoderation(model={model_name})\033[32m 200 OK\033[0m" - ) - return response - except Exception as e: - verbose_router_logger.info( - f"litellm.amoderation(model={model_name})\033[31m Exception {str(e)}\033[0m" - ) - if model_name is not None: - self.fail_calls[model_name] += 1 - raise e - async def arerank(self, model: str, **kwargs): try: kwargs["model"] = model @@ -2610,20 +2542,46 @@ class Router: return final_results - #### ASSISTANTS API #### + #### PASSTHROUGH API #### - def factory_function(self, original_function: Callable): + async def _pass_through_moderation_endpoint_factory( + self, + original_function: Callable, + **kwargs, + ): + if ( + "model" in kwargs + and self.get_model_list(model_name=kwargs["model"]) is not None + ): + deployment = await self.async_get_available_deployment( + model=kwargs["model"] + ) + kwargs["model"] = deployment["litellm_params"]["model"] + return await original_function(**kwargs) + + def factory_function( + self, + original_function: Callable, + call_type: Literal["assistants", "moderation"] = "assistants", + ): async def new_function( custom_llm_provider: Optional[Literal["openai", "azure"]] = None, client: Optional["AsyncOpenAI"] = None, **kwargs, ): - return await self._pass_through_assistants_endpoint_factory( - original_function=original_function, - custom_llm_provider=custom_llm_provider, - client=client, - **kwargs, - ) + if call_type == "assistants": + return await self._pass_through_assistants_endpoint_factory( + original_function=original_function, + custom_llm_provider=custom_llm_provider, + client=client, + **kwargs, + ) + elif call_type == "moderation": + + return await self._pass_through_moderation_endpoint_factory( # type: ignore + original_function=original_function, + **kwargs, + ) return new_function @@ -5052,10 +5010,12 @@ class Router: ) if len(healthy_deployments) == 0: - raise ValueError( - "{}. You passed in model={}. There is no 'model_name' with this string ".format( - RouterErrors.no_deployments_available.value, model - ) + raise litellm.BadRequestError( + message="You passed in model={}. There is no 'model_name' with this string ".format( + model + ), + model=model, + llm_provider="", ) if litellm.model_alias_map and model in litellm.model_alias_map: diff --git a/litellm/utils.py b/litellm/utils.py index 6dd0a5009..efda579d6 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1043,6 +1043,7 @@ def client(original_function): # noqa: PLR0915 if ( call_type != CallTypes.aimage_generation.value # model optional and call_type != CallTypes.atext_completion.value # can also be engine + and call_type != CallTypes.amoderation.value ): raise ValueError("model param not passed in.") diff --git a/tests/local_testing/test_jwt.py b/tests/local_testing/test_jwt.py index 15ce4192d..ad929ba4f 100644 --- a/tests/local_testing/test_jwt.py +++ b/tests/local_testing/test_jwt.py @@ -689,9 +689,10 @@ async def aaaatest_user_token_output( assert team_result.user_id == user_id +@pytest.mark.parametrize("admin_allowed_routes", [None, ["ui_routes"]]) @pytest.mark.parametrize("audience", [None, "litellm-proxy"]) @pytest.mark.asyncio -async def test_allowed_routes_admin(prisma_client, audience): +async def test_allowed_routes_admin(prisma_client, audience, admin_allowed_routes): """ Add a check to make sure jwt proxy admin scope can access all allowed admin routes @@ -754,12 +755,17 @@ async def test_allowed_routes_admin(prisma_client, audience): jwt_handler.user_api_key_cache = cache - jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth(team_id_jwt_field="client_id") + if admin_allowed_routes: + jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth( + team_id_jwt_field="client_id", admin_allowed_routes=admin_allowed_routes + ) + else: + jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth(team_id_jwt_field="client_id") # VALID TOKEN ## GENERATE A TOKEN # Assuming the current time is in UTC - expiration_time = int((datetime.utcnow() + timedelta(minutes=10)).timestamp()) + expiration_time = int((datetime.now() + timedelta(minutes=10)).timestamp()) # Generate the JWT token # But before, you should convert bytes to string @@ -777,6 +783,7 @@ async def test_allowed_routes_admin(prisma_client, audience): # verify token + print(f"admin_token: {admin_token}") response = await jwt_handler.auth_jwt(token=admin_token) ## RUN IT THROUGH USER API KEY AUTH diff --git a/tests/local_testing/test_router.py b/tests/local_testing/test_router.py index 8884f4c3a..cd5e8f6b2 100644 --- a/tests/local_testing/test_router.py +++ b/tests/local_testing/test_router.py @@ -1866,16 +1866,9 @@ async def test_router_amoderation(): router = Router(model_list=model_list) ## Test 1: user facing function result = await router.amoderation( - model="openai-moderations", input="this is valid good text" + model="text-moderation-stable", input="this is valid good text" ) - ## Test 2: underlying function - result = await router._amoderation( - model="openai-moderations", input="this is valid good text" - ) - - print("moderation result", result) - def test_router_add_deployment(): initial_model_list = [ diff --git a/tests/local_testing/test_router_fallbacks.py b/tests/local_testing/test_router_fallbacks.py index 3e91cd79a..a5149b9fa 100644 --- a/tests/local_testing/test_router_fallbacks.py +++ b/tests/local_testing/test_router_fallbacks.py @@ -1226,9 +1226,7 @@ async def test_using_default_fallback(sync_mode): pytest.fail(f"Expected call to fail we passed model=openai/foo") except Exception as e: print("got exception = ", e) - from litellm.types.router import RouterErrors - - assert RouterErrors.no_deployments_available.value in str(e) + assert "BadRequestError" in str(e) @pytest.mark.parametrize("sync_mode", [False]) diff --git a/tests/local_testing/test_router_pattern_matching.py b/tests/local_testing/test_router_pattern_matching.py index 9d8c4db0d..2a6f66105 100644 --- a/tests/local_testing/test_router_pattern_matching.py +++ b/tests/local_testing/test_router_pattern_matching.py @@ -158,6 +158,46 @@ def test_route_with_exception(): assert result is None +@pytest.mark.asyncio +async def test_route_with_no_matching_pattern(): + """ + Tests that the router returns None when there is no matching pattern + """ + from litellm.types.router import RouterErrors + + router = Router( + model_list=[ + { + "model_name": "*meta.llama3*", + "litellm_params": {"model": "bedrock/meta.llama3*"}, + } + ] + ) + + ## WORKS + result = await router.acompletion( + model="bedrock/meta.llama3-70b", + messages=[{"role": "user", "content": "Hello, world!"}], + mock_response="Works", + ) + assert result.choices[0].message.content == "Works" + + ## FAILS + with pytest.raises(litellm.BadRequestError) as e: + await router.acompletion( + model="my-fake-model", + messages=[{"role": "user", "content": "Hello, world!"}], + mock_response="Works", + ) + + assert RouterErrors.no_deployments_available.value not in str(e.value) + + with pytest.raises(litellm.BadRequestError): + await router.aembedding( + model="my-fake-model", + input="Hello, world!", + ) + def test_router_pattern_match_e2e(): """ Tests the end to end flow of the router @@ -188,3 +228,4 @@ def test_router_pattern_match_e2e(): "model": "gpt-4o", "messages": [{"role": "user", "content": "Hello, how are you?"}], } + diff --git a/tests/router_unit_tests/test_router_helper_utils.py b/tests/router_unit_tests/test_router_helper_utils.py index ddd7a502c..cabb4a899 100644 --- a/tests/router_unit_tests/test_router_helper_utils.py +++ b/tests/router_unit_tests/test_router_helper_utils.py @@ -999,3 +999,10 @@ def test_pattern_match_deployment_set_model_name( for model in updated_models: assert model["litellm_params"]["model"] == expected_model + +@pytest.mark.asyncio +async def test_pass_through_moderation_endpoint_factory(model_list): + router = Router(model_list=model_list) + response = await router._pass_through_moderation_endpoint_factory( + original_function=litellm.amoderation, input="this is valid good text" + ) \ No newline at end of file