diff --git a/tests/local_testing/test_router_init.py b/tests/local_testing/test_router_init.py
index 4fce5cbfcc..00b2daa764 100644
--- a/tests/local_testing/test_router_init.py
+++ b/tests/local_testing/test_router_init.py
@@ -1,704 +1,704 @@
-# this tests if the router is initialized correctly
-import asyncio
-import os
-import sys
-import time
-import traceback
-
-import pytest
-
-sys.path.insert(
-    0, os.path.abspath("../..")
-)  # Adds the parent directory to the system path
-from collections import defaultdict
-from concurrent.futures import ThreadPoolExecutor
-
-from dotenv import load_dotenv
-
-import litellm
-from litellm import Router
-
-load_dotenv()
-
-# every time we load the router we should have 4 clients:
-# Async
-# Sync
-# Async + Stream
-# Sync + Stream
-
-
-def test_init_clients():
-    litellm.set_verbose = True
-    import logging
-
-    from litellm._logging import verbose_router_logger
-
-    verbose_router_logger.setLevel(logging.DEBUG)
-    try:
-        print("testing init 4 clients with diff timeouts")
-        model_list = [
-            {
-                "model_name": "gpt-3.5-turbo",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                    "timeout": 0.01,
-                    "stream_timeout": 0.000_001,
-                    "max_retries": 7,
-                },
-            },
-        ]
-        router = Router(model_list=model_list, set_verbose=True)
-        for elem in router.model_list:
-            model_id = elem["model_info"]["id"]
-            assert router.cache.get_cache(f"{model_id}_client") is not None
-            assert router.cache.get_cache(f"{model_id}_async_client") is not None
-            assert router.cache.get_cache(f"{model_id}_stream_client") is not None
-            assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
-
-            # check if timeout for stream/non stream clients is set correctly
-            async_client = router.cache.get_cache(f"{model_id}_async_client")
-            stream_async_client = router.cache.get_cache(
-                f"{model_id}_stream_async_client"
-            )
-
-            assert async_client.timeout == 0.01
-            assert stream_async_client.timeout == 0.000_001
-            print(vars(async_client))
-            print()
-            print(async_client._base_url)
-            assert (
-                async_client._base_url
-                == "https://openai-gpt-4-test-v-1.openai.azure.com/openai/"
-            )
-            assert (
-                stream_async_client._base_url
-                == "https://openai-gpt-4-test-v-1.openai.azure.com/openai/"
-            )
-
-        print("PASSED !")
-
-    except Exception as e:
-        traceback.print_exc()
-        pytest.fail(f"Error occurred: {e}")
-
-
-# test_init_clients()
-
-
-def test_init_clients_basic():
-    litellm.set_verbose = True
-    try:
-        print("Test basic client init")
-        model_list = [
-            {
-                "model_name": "gpt-3.5-turbo",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                },
-            },
-        ]
-        router = Router(model_list=model_list)
-        for elem in router.model_list:
-            model_id = elem["model_info"]["id"]
-            assert router.cache.get_cache(f"{model_id}_client") is not None
-            assert router.cache.get_cache(f"{model_id}_async_client") is not None
-            assert router.cache.get_cache(f"{model_id}_stream_client") is not None
-            assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
-        print("PASSED !")
-
-        # see if we can init clients without timeout or max retries set
-    except Exception as e:
-        traceback.print_exc()
-        pytest.fail(f"Error occurred: {e}")
-
-
-# test_init_clients_basic()
-
-
-def test_init_clients_basic_azure_cloudflare():
-    # init azure + cloudflare
-    # init OpenAI gpt-3.5
-    # init OpenAI text-embedding
-    # init OpenAI comptaible - Mistral/mistral-medium
-    # init OpenAI compatible - xinference/bge
-    litellm.set_verbose = True
-    try:
-        print("Test basic client init")
-        model_list = [
-            {
-                "model_name": "azure-cloudflare",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "api_base": "https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1",
-                },
-            },
-            {
-                "model_name": "gpt-openai",
-                "litellm_params": {
-                    "model": "gpt-3.5-turbo",
-                    "api_key": os.getenv("OPENAI_API_KEY"),
-                },
-            },
-            {
-                "model_name": "text-embedding-ada-002",
-                "litellm_params": {
-                    "model": "text-embedding-ada-002",
-                    "api_key": os.getenv("OPENAI_API_KEY"),
-                },
-            },
-            {
-                "model_name": "mistral",
-                "litellm_params": {
-                    "model": "mistral/mistral-tiny",
-                    "api_key": os.getenv("MISTRAL_API_KEY"),
-                },
-            },
-            {
-                "model_name": "bge-base-en",
-                "litellm_params": {
-                    "model": "xinference/bge-base-en",
-                    "api_base": "http://127.0.0.1:9997/v1",
-                    "api_key": os.getenv("OPENAI_API_KEY"),
-                },
-            },
-        ]
-        router = Router(model_list=model_list)
-        for elem in router.model_list:
-            model_id = elem["model_info"]["id"]
-            assert router.cache.get_cache(f"{model_id}_client") is not None
-            assert router.cache.get_cache(f"{model_id}_async_client") is not None
-            assert router.cache.get_cache(f"{model_id}_stream_client") is not None
-            assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
-        print("PASSED !")
-
-        # see if we can init clients without timeout or max retries set
-    except Exception as e:
-        traceback.print_exc()
-        pytest.fail(f"Error occurred: {e}")
-
-
-# test_init_clients_basic_azure_cloudflare()
-
-
-def test_timeouts_router():
-    """
-    Test the timeouts of the router with multiple clients. This HASas to raise a timeout error
-    """
-    import openai
-
-    litellm.set_verbose = True
-    try:
-        print("testing init 4 clients with diff timeouts")
-        model_list = [
-            {
-                "model_name": "gpt-3.5-turbo",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                    "timeout": 0.000001,
-                    "stream_timeout": 0.000_001,
-                },
-            },
-        ]
-        router = Router(model_list=model_list, num_retries=0)
-
-        print("PASSED !")
-
-        async def test():
-            try:
-                await router.acompletion(
-                    model="gpt-3.5-turbo",
-                    messages=[
-                        {"role": "user", "content": "hello, write a 20 pg essay"}
-                    ],
-                )
-            except Exception as e:
-                raise e
-
-        asyncio.run(test())
-    except openai.APITimeoutError as e:
-        print(
-            "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
-        )
-        print(type(e))
-        pass
-    except Exception as e:
-        pytest.fail(
-            f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
-        )
-
-
-# test_timeouts_router()
-
-
-def test_stream_timeouts_router():
-    """
-    Test the stream timeouts router. See if it selected the correct client with stream timeout
-    """
-    import openai
-
-    litellm.set_verbose = True
-    try:
-        print("testing init 4 clients with diff timeouts")
-        model_list = [
-            {
-                "model_name": "gpt-3.5-turbo",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                    "timeout": 200,  # regular calls will not timeout, stream calls will
-                    "stream_timeout": 10,
-                },
-            },
-        ]
-        router = Router(model_list=model_list)
-
-        print("PASSED !")
-        data = {
-            "model": "gpt-3.5-turbo",
-            "messages": [{"role": "user", "content": "hello, write a 20 pg essay"}],
-            "stream": True,
-        }
-        selected_client = router._get_client(
-            deployment=router.model_list[0],
-            kwargs=data,
-            client_type=None,
-        )
-        print("Select client timeout", selected_client.timeout)
-        assert selected_client.timeout == 10
-
-        # make actual call
-        response = router.completion(**data)
-
-        for chunk in response:
-            print(f"chunk: {chunk}")
-    except openai.APITimeoutError as e:
-        print(
-            "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
-        )
-        print(type(e))
-        pass
-    except Exception as e:
-        pytest.fail(
-            f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
-        )
-
-
-# test_stream_timeouts_router()
-
-
-def test_xinference_embedding():
-    # [Test Init Xinference] this tests if we init xinference on the router correctly
-    # [Test Exception Mapping] tests that xinference is an openai comptiable provider
-    print("Testing init xinference")
-    print(
-        "this tests if we create an OpenAI client for Xinference, with the correct API BASE"
-    )
-
-    model_list = [
-        {
-            "model_name": "xinference",
-            "litellm_params": {
-                "model": "xinference/bge-base-en",
-                "api_base": "os.environ/XINFERENCE_API_BASE",
-            },
-        }
-    ]
-
-    router = Router(model_list=model_list)
-
-    print(router.model_list)
-    print(router.model_list[0])
-
-    assert (
-        router.model_list[0]["litellm_params"]["api_base"] == "http://0.0.0.0:9997"
-    )  # set in env
-
-    openai_client = router._get_client(
-        deployment=router.model_list[0],
-        kwargs={"input": ["hello"], "model": "xinference"},
-    )
-
-    assert openai_client._base_url == "http://0.0.0.0:9997"
-    assert "xinference" in litellm.openai_compatible_providers
-    print("passed")
-
-
-# test_xinference_embedding()
-
-
-def test_router_init_gpt_4_vision_enhancements():
-    try:
-        # tests base_url set when any base_url with /openai/deployments passed to router
-        print("Testing Azure GPT_Vision enhancements")
-
-        model_list = [
-            {
-                "model_name": "gpt-4-vision-enhancements",
-                "litellm_params": {
-                    "model": "azure/gpt-4-vision",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "base_url": "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/",
-                    "dataSources": [
-                        {
-                            "type": "AzureComputerVision",
-                            "parameters": {
-                                "endpoint": "os.environ/AZURE_VISION_ENHANCE_ENDPOINT",
-                                "key": "os.environ/AZURE_VISION_ENHANCE_KEY",
-                            },
-                        }
-                    ],
-                },
-            }
-        ]
-
-        router = Router(model_list=model_list)
-
-        print(router.model_list)
-        print(router.model_list[0])
-
-        assert (
-            router.model_list[0]["litellm_params"]["base_url"]
-            == "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/"
-        )  # set in env
-
-        assert (
-            router.model_list[0]["litellm_params"]["dataSources"][0]["parameters"][
-                "endpoint"
-            ]
-            == os.environ["AZURE_VISION_ENHANCE_ENDPOINT"]
-        )
-
-        assert (
-            router.model_list[0]["litellm_params"]["dataSources"][0]["parameters"][
-                "key"
-            ]
-            == os.environ["AZURE_VISION_ENHANCE_KEY"]
-        )
-
-        azure_client = router._get_client(
-            deployment=router.model_list[0],
-            kwargs={"stream": True, "model": "gpt-4-vision-enhancements"},
-            client_type="async",
-        )
-
-        assert (
-            azure_client._base_url
-            == "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/"
-        )
-        print("passed")
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
-@pytest.mark.parametrize("sync_mode", [True, False])
-@pytest.mark.asyncio
-async def test_openai_with_organization(sync_mode):
-    try:
-        print("Testing OpenAI with organization")
-        model_list = [
-            {
-                "model_name": "openai-bad-org",
-                "litellm_params": {
-                    "model": "gpt-3.5-turbo",
-                    "organization": "org-ikDc4ex8NB",
-                },
-            },
-            {
-                "model_name": "openai-good-org",
-                "litellm_params": {"model": "gpt-3.5-turbo"},
-            },
-        ]
-
-        router = Router(model_list=model_list)
-
-        print(router.model_list)
-        print(router.model_list[0])
-
-        if sync_mode:
-            openai_client = router._get_client(
-                deployment=router.model_list[0],
-                kwargs={"input": ["hello"], "model": "openai-bad-org"},
-            )
-            print(vars(openai_client))
-
-            assert openai_client.organization == "org-ikDc4ex8NB"
-
-            # bad org raises error
-
-            try:
-                response = router.completion(
-                    model="openai-bad-org",
-                    messages=[{"role": "user", "content": "this is a test"}],
-                )
-                pytest.fail(
-                    "Request should have failed - This organization does not exist"
-                )
-            except Exception as e:
-                print("Got exception: " + str(e))
-                assert "header should match organization for API key" in str(
-                    e
-                ) or "No such organization" in str(e)
-
-            # good org works
-            response = router.completion(
-                model="openai-good-org",
-                messages=[{"role": "user", "content": "this is a test"}],
-                max_tokens=5,
-            )
-        else:
-            openai_client = router._get_client(
-                deployment=router.model_list[0],
-                kwargs={"input": ["hello"], "model": "openai-bad-org"},
-                client_type="async",
-            )
-            print(vars(openai_client))
-
-            assert openai_client.organization == "org-ikDc4ex8NB"
-
-            # bad org raises error
-
-            try:
-                response = await router.acompletion(
-                    model="openai-bad-org",
-                    messages=[{"role": "user", "content": "this is a test"}],
-                )
-                pytest.fail(
-                    "Request should have failed - This organization does not exist"
-                )
-            except Exception as e:
-                print("Got exception: " + str(e))
-                assert "header should match organization for API key" in str(
-                    e
-                ) or "No such organization" in str(e)
-
-            # good org works
-            response = await router.acompletion(
-                model="openai-good-org",
-                messages=[{"role": "user", "content": "this is a test"}],
-                max_tokens=5,
-            )
-
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
-def test_init_clients_azure_command_r_plus():
-    # This tests that the router uses the OpenAI client for Azure/Command-R+
-    # For azure/command-r-plus we need to use openai.OpenAI because of how the Azure provider requires requests being sent
-    litellm.set_verbose = True
-    import logging
-
-    from litellm._logging import verbose_router_logger
-
-    verbose_router_logger.setLevel(logging.DEBUG)
-    try:
-        print("testing init 4 clients with diff timeouts")
-        model_list = [
-            {
-                "model_name": "gpt-3.5-turbo",
-                "litellm_params": {
-                    "model": "azure/command-r-plus",
-                    "api_key": os.getenv("AZURE_COHERE_API_KEY"),
-                    "api_base": os.getenv("AZURE_COHERE_API_BASE"),
-                    "timeout": 0.01,
-                    "stream_timeout": 0.000_001,
-                    "max_retries": 7,
-                },
-            },
-        ]
-        router = Router(model_list=model_list, set_verbose=True)
-        for elem in router.model_list:
-            model_id = elem["model_info"]["id"]
-            async_client = router.cache.get_cache(f"{model_id}_async_client")
-            stream_async_client = router.cache.get_cache(
-                f"{model_id}_stream_async_client"
-            )
-            # Assert the Async Clients used are OpenAI clients and not Azure
-            # For using Azure/Command-R-Plus and Azure/Mistral the clients NEED to be OpenAI clients used
-            # this is weirdness introduced on Azure's side
-
-            assert "openai.AsyncOpenAI" in str(async_client)
-            assert "openai.AsyncOpenAI" in str(stream_async_client)
-        print("PASSED !")
-
-    except Exception as e:
-        traceback.print_exc()
-        pytest.fail(f"Error occurred: {e}")
-
-
-@pytest.mark.asyncio
-async def test_aaaaatext_completion_with_organization():
-    try:
-        print("Testing Text OpenAI with organization")
-        model_list = [
-            {
-                "model_name": "openai-bad-org",
-                "litellm_params": {
-                    "model": "text-completion-openai/gpt-3.5-turbo-instruct",
-                    "api_key": os.getenv("OPENAI_API_KEY", None),
-                    "organization": "org-ikDc4ex8NB",
-                },
-            },
-            {
-                "model_name": "openai-good-org",
-                "litellm_params": {
-                    "model": "text-completion-openai/gpt-3.5-turbo-instruct",
-                    "api_key": os.getenv("OPENAI_API_KEY", None),
-                    "organization": os.getenv("OPENAI_ORGANIZATION", None),
-                },
-            },
-        ]
-
-        router = Router(model_list=model_list)
-
-        print(router.model_list)
-        print(router.model_list[0])
-
-        openai_client = router._get_client(
-            deployment=router.model_list[0],
-            kwargs={"input": ["hello"], "model": "openai-bad-org"},
-        )
-        print(vars(openai_client))
-
-        assert openai_client.organization == "org-ikDc4ex8NB"
-
-        # bad org raises error
-
-        try:
-            response = await router.atext_completion(
-                model="openai-bad-org",
-                prompt="this is a test",
-            )
-            pytest.fail("Request should have failed - This organization does not exist")
-        except Exception as e:
-            print("Got exception: " + str(e))
-            assert "header should match organization for API key" in str(
-                e
-            ) or "No such organization" in str(e)
-
-        # good org works
-        response = await router.atext_completion(
-            model="openai-good-org",
-            prompt="this is a test",
-            max_tokens=5,
-        )
-        print("working response: ", response)
-
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
-def test_init_clients_async_mode():
-    litellm.set_verbose = True
-    import logging
-
-    from litellm._logging import verbose_router_logger
-    from litellm.types.router import RouterGeneralSettings
-
-    verbose_router_logger.setLevel(logging.DEBUG)
-    try:
-        print("testing init 4 clients with diff timeouts")
-        model_list = [
-            {
-                "model_name": "gpt-3.5-turbo",
-                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                    "timeout": 0.01,
-                    "stream_timeout": 0.000_001,
-                    "max_retries": 7,
-                },
-            },
-        ]
-        router = Router(
-            model_list=model_list,
-            set_verbose=True,
-            router_general_settings=RouterGeneralSettings(async_only_mode=True),
-        )
-        for elem in router.model_list:
-            model_id = elem["model_info"]["id"]
-
-            # sync clients not initialized in async_only_mode=True
-            assert router.cache.get_cache(f"{model_id}_client") is None
-            assert router.cache.get_cache(f"{model_id}_stream_client") is None
-
-            # only async clients initialized in async_only_mode=True
-            assert router.cache.get_cache(f"{model_id}_async_client") is not None
-            assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
-@pytest.mark.parametrize(
-    "environment,expected_models",
-    [
-        ("development", ["gpt-3.5-turbo"]),
-        ("production", ["gpt-4", "gpt-3.5-turbo", "gpt-4o"]),
-    ],
-)
-def test_init_router_with_supported_environments(environment, expected_models):
-    """
-    Tests that the correct models are setup on router when LITELLM_ENVIRONMENT is set
-    """
-    os.environ["LITELLM_ENVIRONMENT"] = environment
-    model_list = [
-        {
-            "model_name": "gpt-3.5-turbo",
-            "litellm_params": {
-                "model": "azure/chatgpt-v-2",
-                "api_key": os.getenv("AZURE_API_KEY"),
-                "api_version": os.getenv("AZURE_API_VERSION"),
-                "api_base": os.getenv("AZURE_API_BASE"),
-                "timeout": 0.01,
-                "stream_timeout": 0.000_001,
-                "max_retries": 7,
-            },
-            "model_info": {"supported_environments": ["development", "production"]},
-        },
-        {
-            "model_name": "gpt-4",
-            "litellm_params": {
-                "model": "openai/gpt-4",
-                "api_key": os.getenv("OPENAI_API_KEY"),
-                "timeout": 0.01,
-                "stream_timeout": 0.000_001,
-                "max_retries": 7,
-            },
-            "model_info": {"supported_environments": ["production"]},
-        },
-        {
-            "model_name": "gpt-4o",
-            "litellm_params": {
-                "model": "openai/gpt-4o",
-                "api_key": os.getenv("OPENAI_API_KEY"),
-                "timeout": 0.01,
-                "stream_timeout": 0.000_001,
-                "max_retries": 7,
-            },
-            "model_info": {"supported_environments": ["production"]},
-        },
-    ]
-    router = Router(model_list=model_list, set_verbose=True)
-    _model_list = router.get_model_names()
-
-    print("model_list: ", _model_list)
-    print("expected_models: ", expected_models)
-
-    assert set(_model_list) == set(expected_models)
-
-    os.environ.pop("LITELLM_ENVIRONMENT")
+# # this tests if the router is initialized correctly
+# import asyncio
+# import os
+# import sys
+# import time
+# import traceback
+
+# import pytest
+
+# sys.path.insert(
+#     0, os.path.abspath("../..")
+# )  # Adds the parent directory to the system path
+# from collections import defaultdict
+# from concurrent.futures import ThreadPoolExecutor
+
+# from dotenv import load_dotenv
+
+# import litellm
+# from litellm import Router
+
+# load_dotenv()
+
+# # every time we load the router we should have 4 clients:
+# # Async
+# # Sync
+# # Async + Stream
+# # Sync + Stream
+
+
+# def test_init_clients():
+#     litellm.set_verbose = True
+#     import logging
+
+#     from litellm._logging import verbose_router_logger
+
+#     verbose_router_logger.setLevel(logging.DEBUG)
+#     try:
+#         print("testing init 4 clients with diff timeouts")
+#         model_list = [
+#             {
+#                 "model_name": "gpt-3.5-turbo",
+#                 "litellm_params": {
+#                     "model": "azure/chatgpt-v-2",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "api_version": os.getenv("AZURE_API_VERSION"),
+#                     "api_base": os.getenv("AZURE_API_BASE"),
+#                     "timeout": 0.01,
+#                     "stream_timeout": 0.000_001,
+#                     "max_retries": 7,
+#                 },
+#             },
+#         ]
+#         router = Router(model_list=model_list, set_verbose=True)
+#         for elem in router.model_list:
+#             model_id = elem["model_info"]["id"]
+#             assert router.cache.get_cache(f"{model_id}_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_async_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_stream_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
+
+#             # check if timeout for stream/non stream clients is set correctly
+#             async_client = router.cache.get_cache(f"{model_id}_async_client")
+#             stream_async_client = router.cache.get_cache(
+#                 f"{model_id}_stream_async_client"
+#             )
+
+#             assert async_client.timeout == 0.01
+#             assert stream_async_client.timeout == 0.000_001
+#             print(vars(async_client))
+#             print()
+#             print(async_client._base_url)
+#             assert (
+#                 async_client._base_url
+#                 == "https://openai-gpt-4-test-v-1.openai.azure.com/openai/"
+#             )
+#             assert (
+#                 stream_async_client._base_url
+#                 == "https://openai-gpt-4-test-v-1.openai.azure.com/openai/"
+#             )
+
+#         print("PASSED !")
+
+#     except Exception as e:
+#         traceback.print_exc()
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# # test_init_clients()
+
+
+# def test_init_clients_basic():
+#     litellm.set_verbose = True
+#     try:
+#         print("Test basic client init")
+#         model_list = [
+#             {
+#                 "model_name": "gpt-3.5-turbo",
+#                 "litellm_params": {
+#                     "model": "azure/chatgpt-v-2",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "api_version": os.getenv("AZURE_API_VERSION"),
+#                     "api_base": os.getenv("AZURE_API_BASE"),
+#                 },
+#             },
+#         ]
+#         router = Router(model_list=model_list)
+#         for elem in router.model_list:
+#             model_id = elem["model_info"]["id"]
+#             assert router.cache.get_cache(f"{model_id}_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_async_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_stream_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
+#         print("PASSED !")
+
+#         # see if we can init clients without timeout or max retries set
+#     except Exception as e:
+#         traceback.print_exc()
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# # test_init_clients_basic()
+
+
+# def test_init_clients_basic_azure_cloudflare():
+#     # init azure + cloudflare
+#     # init OpenAI gpt-3.5
+#     # init OpenAI text-embedding
+#     # init OpenAI comptaible - Mistral/mistral-medium
+#     # init OpenAI compatible - xinference/bge
+#     litellm.set_verbose = True
+#     try:
+#         print("Test basic client init")
+#         model_list = [
+#             {
+#                 "model_name": "azure-cloudflare",
+#                 "litellm_params": {
+#                     "model": "azure/chatgpt-v-2",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "api_version": os.getenv("AZURE_API_VERSION"),
+#                     "api_base": "https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1",
+#                 },
+#             },
+#             {
+#                 "model_name": "gpt-openai",
+#                 "litellm_params": {
+#                     "model": "gpt-3.5-turbo",
+#                     "api_key": os.getenv("OPENAI_API_KEY"),
+#                 },
+#             },
+#             {
+#                 "model_name": "text-embedding-ada-002",
+#                 "litellm_params": {
+#                     "model": "text-embedding-ada-002",
+#                     "api_key": os.getenv("OPENAI_API_KEY"),
+#                 },
+#             },
+#             {
+#                 "model_name": "mistral",
+#                 "litellm_params": {
+#                     "model": "mistral/mistral-tiny",
+#                     "api_key": os.getenv("MISTRAL_API_KEY"),
+#                 },
+#             },
+#             {
+#                 "model_name": "bge-base-en",
+#                 "litellm_params": {
+#                     "model": "xinference/bge-base-en",
+#                     "api_base": "http://127.0.0.1:9997/v1",
+#                     "api_key": os.getenv("OPENAI_API_KEY"),
+#                 },
+#             },
+#         ]
+#         router = Router(model_list=model_list)
+#         for elem in router.model_list:
+#             model_id = elem["model_info"]["id"]
+#             assert router.cache.get_cache(f"{model_id}_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_async_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_stream_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
+#         print("PASSED !")
+
+#         # see if we can init clients without timeout or max retries set
+#     except Exception as e:
+#         traceback.print_exc()
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# # test_init_clients_basic_azure_cloudflare()
+
+
+# def test_timeouts_router():
+#     """
+#     Test the timeouts of the router with multiple clients. This HASas to raise a timeout error
+#     """
+#     import openai
+
+#     litellm.set_verbose = True
+#     try:
+#         print("testing init 4 clients with diff timeouts")
+#         model_list = [
+#             {
+#                 "model_name": "gpt-3.5-turbo",
+#                 "litellm_params": {
+#                     "model": "azure/chatgpt-v-2",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "api_version": os.getenv("AZURE_API_VERSION"),
+#                     "api_base": os.getenv("AZURE_API_BASE"),
+#                     "timeout": 0.000001,
+#                     "stream_timeout": 0.000_001,
+#                 },
+#             },
+#         ]
+#         router = Router(model_list=model_list, num_retries=0)
+
+#         print("PASSED !")
+
+#         async def test():
+#             try:
+#                 await router.acompletion(
+#                     model="gpt-3.5-turbo",
+#                     messages=[
+#                         {"role": "user", "content": "hello, write a 20 pg essay"}
+#                     ],
+#                 )
+#             except Exception as e:
+#                 raise e
+
+#         asyncio.run(test())
+#     except openai.APITimeoutError as e:
+#         print(
+#             "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
+#         )
+#         print(type(e))
+#         pass
+#     except Exception as e:
+#         pytest.fail(
+#             f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
+#         )
+
+
+# # test_timeouts_router()
+
+
+# def test_stream_timeouts_router():
+#     """
+#     Test the stream timeouts router. See if it selected the correct client with stream timeout
+#     """
+#     import openai
+
+#     litellm.set_verbose = True
+#     try:
+#         print("testing init 4 clients with diff timeouts")
+#         model_list = [
+#             {
+#                 "model_name": "gpt-3.5-turbo",
+#                 "litellm_params": {
+#                     "model": "azure/chatgpt-v-2",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "api_version": os.getenv("AZURE_API_VERSION"),
+#                     "api_base": os.getenv("AZURE_API_BASE"),
+#                     "timeout": 200,  # regular calls will not timeout, stream calls will
+#                     "stream_timeout": 10,
+#                 },
+#             },
+#         ]
+#         router = Router(model_list=model_list)
+
+#         print("PASSED !")
+#         data = {
+#             "model": "gpt-3.5-turbo",
+#             "messages": [{"role": "user", "content": "hello, write a 20 pg essay"}],
+#             "stream": True,
+#         }
+#         selected_client = router._get_client(
+#             deployment=router.model_list[0],
+#             kwargs=data,
+#             client_type=None,
+#         )
+#         print("Select client timeout", selected_client.timeout)
+#         assert selected_client.timeout == 10
+
+#         # make actual call
+#         response = router.completion(**data)
+
+#         for chunk in response:
+#             print(f"chunk: {chunk}")
+#     except openai.APITimeoutError as e:
+#         print(
+#             "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
+#         )
+#         print(type(e))
+#         pass
+#     except Exception as e:
+#         pytest.fail(
+#             f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}"
+#         )
+
+
+# # test_stream_timeouts_router()
+
+
+# def test_xinference_embedding():
+#     # [Test Init Xinference] this tests if we init xinference on the router correctly
+#     # [Test Exception Mapping] tests that xinference is an openai comptiable provider
+#     print("Testing init xinference")
+#     print(
+#         "this tests if we create an OpenAI client for Xinference, with the correct API BASE"
+#     )
+
+#     model_list = [
+#         {
+#             "model_name": "xinference",
+#             "litellm_params": {
+#                 "model": "xinference/bge-base-en",
+#                 "api_base": "os.environ/XINFERENCE_API_BASE",
+#             },
+#         }
+#     ]
+
+#     router = Router(model_list=model_list)
+
+#     print(router.model_list)
+#     print(router.model_list[0])
+
+#     assert (
+#         router.model_list[0]["litellm_params"]["api_base"] == "http://0.0.0.0:9997"
+#     )  # set in env
+
+#     openai_client = router._get_client(
+#         deployment=router.model_list[0],
+#         kwargs={"input": ["hello"], "model": "xinference"},
+#     )
+
+#     assert openai_client._base_url == "http://0.0.0.0:9997"
+#     assert "xinference" in litellm.openai_compatible_providers
+#     print("passed")
+
+
+# # test_xinference_embedding()
+
+
+# def test_router_init_gpt_4_vision_enhancements():
+#     try:
+#         # tests base_url set when any base_url with /openai/deployments passed to router
+#         print("Testing Azure GPT_Vision enhancements")
+
+#         model_list = [
+#             {
+#                 "model_name": "gpt-4-vision-enhancements",
+#                 "litellm_params": {
+#                     "model": "azure/gpt-4-vision",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "base_url": "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/",
+#                     "dataSources": [
+#                         {
+#                             "type": "AzureComputerVision",
+#                             "parameters": {
+#                                 "endpoint": "os.environ/AZURE_VISION_ENHANCE_ENDPOINT",
+#                                 "key": "os.environ/AZURE_VISION_ENHANCE_KEY",
+#                             },
+#                         }
+#                     ],
+#                 },
+#             }
+#         ]
+
+#         router = Router(model_list=model_list)
+
+#         print(router.model_list)
+#         print(router.model_list[0])
+
+#         assert (
+#             router.model_list[0]["litellm_params"]["base_url"]
+#             == "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/"
+#         )  # set in env
+
+#         assert (
+#             router.model_list[0]["litellm_params"]["dataSources"][0]["parameters"][
+#                 "endpoint"
+#             ]
+#             == os.environ["AZURE_VISION_ENHANCE_ENDPOINT"]
+#         )
+
+#         assert (
+#             router.model_list[0]["litellm_params"]["dataSources"][0]["parameters"][
+#                 "key"
+#             ]
+#             == os.environ["AZURE_VISION_ENHANCE_KEY"]
+#         )
+
+#         azure_client = router._get_client(
+#             deployment=router.model_list[0],
+#             kwargs={"stream": True, "model": "gpt-4-vision-enhancements"},
+#             client_type="async",
+#         )
+
+#         assert (
+#             azure_client._base_url
+#             == "https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions/"
+#         )
+#         print("passed")
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# @pytest.mark.parametrize("sync_mode", [True, False])
+# @pytest.mark.asyncio
+# async def test_openai_with_organization(sync_mode):
+#     try:
+#         print("Testing OpenAI with organization")
+#         model_list = [
+#             {
+#                 "model_name": "openai-bad-org",
+#                 "litellm_params": {
+#                     "model": "gpt-3.5-turbo",
+#                     "organization": "org-ikDc4ex8NB",
+#                 },
+#             },
+#             {
+#                 "model_name": "openai-good-org",
+#                 "litellm_params": {"model": "gpt-3.5-turbo"},
+#             },
+#         ]
+
+#         router = Router(model_list=model_list)
+
+#         print(router.model_list)
+#         print(router.model_list[0])
+
+#         if sync_mode:
+#             openai_client = router._get_client(
+#                 deployment=router.model_list[0],
+#                 kwargs={"input": ["hello"], "model": "openai-bad-org"},
+#             )
+#             print(vars(openai_client))
+
+#             assert openai_client.organization == "org-ikDc4ex8NB"
+
+#             # bad org raises error
+
+#             try:
+#                 response = router.completion(
+#                     model="openai-bad-org",
+#                     messages=[{"role": "user", "content": "this is a test"}],
+#                 )
+#                 pytest.fail(
+#                     "Request should have failed - This organization does not exist"
+#                 )
+#             except Exception as e:
+#                 print("Got exception: " + str(e))
+#                 assert "header should match organization for API key" in str(
+#                     e
+#                 ) or "No such organization" in str(e)
+
+#             # good org works
+#             response = router.completion(
+#                 model="openai-good-org",
+#                 messages=[{"role": "user", "content": "this is a test"}],
+#                 max_tokens=5,
+#             )
+#         else:
+#             openai_client = router._get_client(
+#                 deployment=router.model_list[0],
+#                 kwargs={"input": ["hello"], "model": "openai-bad-org"},
+#                 client_type="async",
+#             )
+#             print(vars(openai_client))
+
+#             assert openai_client.organization == "org-ikDc4ex8NB"
+
+#             # bad org raises error
+
+#             try:
+#                 response = await router.acompletion(
+#                     model="openai-bad-org",
+#                     messages=[{"role": "user", "content": "this is a test"}],
+#                 )
+#                 pytest.fail(
+#                     "Request should have failed - This organization does not exist"
+#                 )
+#             except Exception as e:
+#                 print("Got exception: " + str(e))
+#                 assert "header should match organization for API key" in str(
+#                     e
+#                 ) or "No such organization" in str(e)
+
+#             # good org works
+#             response = await router.acompletion(
+#                 model="openai-good-org",
+#                 messages=[{"role": "user", "content": "this is a test"}],
+#                 max_tokens=5,
+#             )
+
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# def test_init_clients_azure_command_r_plus():
+#     # This tests that the router uses the OpenAI client for Azure/Command-R+
+#     # For azure/command-r-plus we need to use openai.OpenAI because of how the Azure provider requires requests being sent
+#     litellm.set_verbose = True
+#     import logging
+
+#     from litellm._logging import verbose_router_logger
+
+#     verbose_router_logger.setLevel(logging.DEBUG)
+#     try:
+#         print("testing init 4 clients with diff timeouts")
+#         model_list = [
+#             {
+#                 "model_name": "gpt-3.5-turbo",
+#                 "litellm_params": {
+#                     "model": "azure/command-r-plus",
+#                     "api_key": os.getenv("AZURE_COHERE_API_KEY"),
+#                     "api_base": os.getenv("AZURE_COHERE_API_BASE"),
+#                     "timeout": 0.01,
+#                     "stream_timeout": 0.000_001,
+#                     "max_retries": 7,
+#                 },
+#             },
+#         ]
+#         router = Router(model_list=model_list, set_verbose=True)
+#         for elem in router.model_list:
+#             model_id = elem["model_info"]["id"]
+#             async_client = router.cache.get_cache(f"{model_id}_async_client")
+#             stream_async_client = router.cache.get_cache(
+#                 f"{model_id}_stream_async_client"
+#             )
+#             # Assert the Async Clients used are OpenAI clients and not Azure
+#             # For using Azure/Command-R-Plus and Azure/Mistral the clients NEED to be OpenAI clients used
+#             # this is weirdness introduced on Azure's side
+
+#             assert "openai.AsyncOpenAI" in str(async_client)
+#             assert "openai.AsyncOpenAI" in str(stream_async_client)
+#         print("PASSED !")
+
+#     except Exception as e:
+#         traceback.print_exc()
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# @pytest.mark.asyncio
+# async def test_aaaaatext_completion_with_organization():
+#     try:
+#         print("Testing Text OpenAI with organization")
+#         model_list = [
+#             {
+#                 "model_name": "openai-bad-org",
+#                 "litellm_params": {
+#                     "model": "text-completion-openai/gpt-3.5-turbo-instruct",
+#                     "api_key": os.getenv("OPENAI_API_KEY", None),
+#                     "organization": "org-ikDc4ex8NB",
+#                 },
+#             },
+#             {
+#                 "model_name": "openai-good-org",
+#                 "litellm_params": {
+#                     "model": "text-completion-openai/gpt-3.5-turbo-instruct",
+#                     "api_key": os.getenv("OPENAI_API_KEY", None),
+#                     "organization": os.getenv("OPENAI_ORGANIZATION", None),
+#                 },
+#             },
+#         ]
+
+#         router = Router(model_list=model_list)
+
+#         print(router.model_list)
+#         print(router.model_list[0])
+
+#         openai_client = router._get_client(
+#             deployment=router.model_list[0],
+#             kwargs={"input": ["hello"], "model": "openai-bad-org"},
+#         )
+#         print(vars(openai_client))
+
+#         assert openai_client.organization == "org-ikDc4ex8NB"
+
+#         # bad org raises error
+
+#         try:
+#             response = await router.atext_completion(
+#                 model="openai-bad-org",
+#                 prompt="this is a test",
+#             )
+#             pytest.fail("Request should have failed - This organization does not exist")
+#         except Exception as e:
+#             print("Got exception: " + str(e))
+#             assert "header should match organization for API key" in str(
+#                 e
+#             ) or "No such organization" in str(e)
+
+#         # good org works
+#         response = await router.atext_completion(
+#             model="openai-good-org",
+#             prompt="this is a test",
+#             max_tokens=5,
+#         )
+#         print("working response: ", response)
+
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# def test_init_clients_async_mode():
+#     litellm.set_verbose = True
+#     import logging
+
+#     from litellm._logging import verbose_router_logger
+#     from litellm.types.router import RouterGeneralSettings
+
+#     verbose_router_logger.setLevel(logging.DEBUG)
+#     try:
+#         print("testing init 4 clients with diff timeouts")
+#         model_list = [
+#             {
+#                 "model_name": "gpt-3.5-turbo",
+#                 "litellm_params": {
+#                     "model": "azure/chatgpt-v-2",
+#                     "api_key": os.getenv("AZURE_API_KEY"),
+#                     "api_version": os.getenv("AZURE_API_VERSION"),
+#                     "api_base": os.getenv("AZURE_API_BASE"),
+#                     "timeout": 0.01,
+#                     "stream_timeout": 0.000_001,
+#                     "max_retries": 7,
+#                 },
+#             },
+#         ]
+#         router = Router(
+#             model_list=model_list,
+#             set_verbose=True,
+#             router_general_settings=RouterGeneralSettings(async_only_mode=True),
+#         )
+#         for elem in router.model_list:
+#             model_id = elem["model_info"]["id"]
+
+#             # sync clients not initialized in async_only_mode=True
+#             assert router.cache.get_cache(f"{model_id}_client") is None
+#             assert router.cache.get_cache(f"{model_id}_stream_client") is None
+
+#             # only async clients initialized in async_only_mode=True
+#             assert router.cache.get_cache(f"{model_id}_async_client") is not None
+#             assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")
+
+
+# @pytest.mark.parametrize(
+#     "environment,expected_models",
+#     [
+#         ("development", ["gpt-3.5-turbo"]),
+#         ("production", ["gpt-4", "gpt-3.5-turbo", "gpt-4o"]),
+#     ],
+# )
+# def test_init_router_with_supported_environments(environment, expected_models):
+#     """
+#     Tests that the correct models are setup on router when LITELLM_ENVIRONMENT is set
+#     """
+#     os.environ["LITELLM_ENVIRONMENT"] = environment
+#     model_list = [
+#         {
+#             "model_name": "gpt-3.5-turbo",
+#             "litellm_params": {
+#                 "model": "azure/chatgpt-v-2",
+#                 "api_key": os.getenv("AZURE_API_KEY"),
+#                 "api_version": os.getenv("AZURE_API_VERSION"),
+#                 "api_base": os.getenv("AZURE_API_BASE"),
+#                 "timeout": 0.01,
+#                 "stream_timeout": 0.000_001,
+#                 "max_retries": 7,
+#             },
+#             "model_info": {"supported_environments": ["development", "production"]},
+#         },
+#         {
+#             "model_name": "gpt-4",
+#             "litellm_params": {
+#                 "model": "openai/gpt-4",
+#                 "api_key": os.getenv("OPENAI_API_KEY"),
+#                 "timeout": 0.01,
+#                 "stream_timeout": 0.000_001,
+#                 "max_retries": 7,
+#             },
+#             "model_info": {"supported_environments": ["production"]},
+#         },
+#         {
+#             "model_name": "gpt-4o",
+#             "litellm_params": {
+#                 "model": "openai/gpt-4o",
+#                 "api_key": os.getenv("OPENAI_API_KEY"),
+#                 "timeout": 0.01,
+#                 "stream_timeout": 0.000_001,
+#                 "max_retries": 7,
+#             },
+#             "model_info": {"supported_environments": ["production"]},
+#         },
+#     ]
+#     router = Router(model_list=model_list, set_verbose=True)
+#     _model_list = router.get_model_names()
+
+#     print("model_list: ", _model_list)
+#     print("expected_models: ", expected_models)
+
+#     assert set(_model_list) == set(expected_models)
+
+#     os.environ.pop("LITELLM_ENVIRONMENT")