diff --git a/litellm/__init__.py b/litellm/__init__.py index aedf42139..cd4aa5144 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -101,6 +101,9 @@ blocked_user_list: Optional[Union[str, List]] = None banned_keywords_list: Optional[Union[str, List]] = None llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all" ################## +### PREVIEW FEATURES ### +enable_preview_features: bool = False +################## logging: bool = True caching: bool = ( False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml index 752cd281d..832e35113 100644 --- a/litellm/proxy/_super_secret_config.yaml +++ b/litellm/proxy/_super_secret_config.yaml @@ -1,25 +1,13 @@ model_list: - litellm_params: - api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/ - api_key: my-fake-key - model: openai/my-fake-model - model_name: fake-openai-endpoint -- litellm_params: - api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/ - api_key: my-fake-key-2 - model: openai/my-fake-model-2 - model_name: fake-openai-endpoint -- litellm_params: - api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/ - api_key: my-fake-key-3 - model: openai/my-fake-model-3 - model_name: fake-openai-endpoint -- model_name: gpt-4 - litellm_params: - model: gpt-3.5-turbo -- litellm_params: - model: together_ai/codellama/CodeLlama-13b-Instruct-hf - model_name: CodeLlama-13b-Instruct + api_base: os.environ/AZURE_API_BASE + api_key: os.environ/AZURE_API_KEY + api_version: 2023-07-01-preview + model: azure/azure-embedding-model + model_info: + base_model: text-embedding-ada-002 + mode: embedding + model_name: text-embedding-ada-002 router_settings: redis_host: redis @@ -28,6 +16,7 @@ router_settings: litellm_settings: set_verbose: True + enable_preview_features: true # service_callback: ["prometheus_system"] # success_callback: ["prometheus"] # failure_callback: ["prometheus"] diff --git a/litellm/router.py b/litellm/router.py index f0d94908e..e0abc2e3b 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2557,23 +2557,25 @@ class Router: # init OpenAI, Azure clients self.set_client(model=deployment.to_json(exclude_none=True)) - # set region (if azure model) - _auto_infer_region = os.environ.get("AUTO_INFER_REGION", False) - if _auto_infer_region == True or _auto_infer_region == "True": + # set region (if azure model) ## PREVIEW FEATURE ## + if litellm.enable_preview_features == True: print("Auto inferring region") # noqa """ Hiding behind a feature flag When there is a large amount of LLM deployments this makes startup times blow up """ try: - if "azure" in deployment.litellm_params.model: + if ( + "azure" in deployment.litellm_params.model + and deployment.litellm_params.region_name is None + ): region = litellm.utils.get_model_region( litellm_params=deployment.litellm_params, mode=None ) deployment.litellm_params.region_name = region except Exception as e: - verbose_router_logger.error( + verbose_router_logger.debug( "Unable to get the region for azure model - {}, {}".format( deployment.litellm_params.model, str(e) ) diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index 2d277d749..7c59acb79 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -687,6 +687,55 @@ def test_router_context_window_check_pre_call_check_out_group(): pytest.fail(f"Got unexpected exception on router! - {str(e)}") +@pytest.mark.parametrize("allowed_model_region", ["eu", None]) +def test_router_region_pre_call_check(allowed_model_region): + """ + If region based routing set + - check if only model in allowed region is allowed by '_pre_call_checks' + """ + model_list = [ + { + "model_name": "gpt-3.5-turbo", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "azure/chatgpt-v-2", + "api_key": os.getenv("AZURE_API_KEY"), + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE"), + "base_model": "azure/gpt-35-turbo", + "region_name": "eu", + }, + "model_info": {"id": "1"}, + }, + { + "model_name": "gpt-3.5-turbo-large", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "gpt-3.5-turbo-1106", + "api_key": os.getenv("OPENAI_API_KEY"), + }, + "model_info": {"id": "2"}, + }, + ] + + router = Router(model_list=model_list, enable_pre_call_checks=True) + + _healthy_deployments = router._pre_call_checks( + model="gpt-3.5-turbo", + healthy_deployments=model_list, + messages=[{"role": "user", "content": "Hey!"}], + allowed_model_region=allowed_model_region, + ) + + if allowed_model_region is None: + assert len(_healthy_deployments) == 2 + else: + assert len(_healthy_deployments) == 1, "No models selected as healthy" + assert ( + _healthy_deployments[0]["model_info"]["id"] == "1" + ), "Incorrect model id picked. Got id={}, expected id=1".format( + _healthy_deployments[0]["model_info"]["id"] + ) + + ### FUNCTION CALLING diff --git a/litellm/utils.py b/litellm/utils.py index 9218f92a3..1c9c3df92 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5881,6 +5881,9 @@ def calculate_max_parallel_requests( def _is_region_eu(model_region: str) -> bool: + if model_region == "eu": + return True + EU_Regions = ["europe", "sweden", "switzerland", "france", "uk"] for region in EU_Regions: if "europe" in model_region.lower():