test: update tests to new deployment model (#10142)

* test: update tests to new deployment model * test: update model name * test: skip cohere rbac issue test * test: update test - replace gpt-4o model
2025-04-24 18:24:20 +00:00 · 2025-04-18 14:22:12 -07:00 · 2025-04-18 14:22:12 -07:00 · 1ea046cc61
commit 1ea046cc61
parent 415abfc222
72 changed files with 294 additions and 292 deletions
--- a/tests/llm_translation/test_azure_ai.py
+++ b/tests/llm_translation/test_azure_ai.py
@ -14,7 +14,7 @@ from litellm.llms.anthropic.chat import ModelResponseIterator
 import httpx
 import json
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
-from base_rerank_unit_tests import BaseLLMRerankTest
+# from base_rerank_unit_tests import BaseLLMRerankTest

 load_dotenv()
 import io
@ -255,16 +255,17 @@ def test_azure_deepseek_reasoning_content():
        assert response.choices[0].message.content == "\n\nThe sky is a canvas of blue"


-class TestAzureAIRerank(BaseLLMRerankTest):
-    def get_custom_llm_provider(self) -> litellm.LlmProviders:
-        return litellm.LlmProviders.AZURE_AI
+# skipping due to cohere rbac issues
+# class TestAzureAIRerank(BaseLLMRerankTest):
+#     def get_custom_llm_provider(self) -> litellm.LlmProviders:
+#         return litellm.LlmProviders.AZURE_AI

-    def get_base_rerank_call_args(self) -> dict:
-        return {
-            "model": "azure_ai/cohere-rerank-v3-english",
-            "api_base": os.getenv("AZURE_AI_COHERE_API_BASE"),
-            "api_key": os.getenv("AZURE_AI_COHERE_API_KEY"),
-        }
+#     def get_base_rerank_call_args(self) -> dict:
+#         return {
+#             "model": "azure_ai/cohere-rerank-v3-english",
+#             "api_base": os.getenv("AZURE_AI_COHERE_API_BASE"),
+#             "api_key": os.getenv("AZURE_AI_COHERE_API_KEY"),
+#         }


@pytest.mark.asyncio
@ -279,7 +280,7 @@ async def test_azure_ai_request_format():

    # Set up the test parameters
    api_key = os.getenv("AZURE_API_KEY")
-    api_base = f"{os.getenv('AZURE_API_BASE')}/openai/deployments/gpt-4o/chat/completions?api-version=2024-08-01-preview"
+    api_base = f"{os.getenv('AZURE_API_BASE')}/openai/deployments/gpt-4o-new-test/chat/completions?api-version=2024-08-01-preview"
    model = "azure_ai/gpt-4o"
    messages = [
        {"role": "user", "content": "hi"},
--- a/tests/llm_translation/test_azure_openai.py
+++ b/tests/llm_translation/test_azure_openai.py
@ -137,7 +137,7 @@ def test_azure_extra_headers(input, call_type, header_value):
                func = image_generation

            data = {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com",
                "api_version": "2023-07-01-preview",
                "api_key": "my-azure-api-key",
@ -339,7 +339,7 @@ def test_azure_gpt_4o_with_tool_call_and_response_format(api_version):

    with patch.object(client.chat.completions.with_raw_response, "create") as mock_post:
        response = litellm.completion(
-            model="azure/gpt-4o",
+            model="azure/gpt-4o-new-test",
            messages=[
                {
                    "role": "system",
@ -474,7 +474,7 @@ def test_azure_max_retries_0(

    try:
        completion(
-            model="azure/gpt-4o",
+            model="azure/gpt-4o-new-test",
            messages=[{"role": "user", "content": "Hello world"}],
            max_retries=max_retries,
            stream=stream,
@ -502,7 +502,7 @@ async def test_async_azure_max_retries_0(

    try:
        await acompletion(
-            model="azure/gpt-4o",
+            model="azure/gpt-4o-new-test",
            messages=[{"role": "user", "content": "Hello world"}],
            max_retries=max_retries,
            stream=stream,
--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@ -217,7 +217,7 @@ def test_openai_optional_params_embeddings():
 def test_azure_optional_params_embeddings():
    litellm.drop_params = True
    optional_params = get_optional_params_embeddings(
-        model="chatgpt-v-2",
+        model="chatgpt-v-3",
        user="John",
        encoding_format=None,
        custom_llm_provider="azure",
@ -396,7 +396,7 @@ def test_azure_tool_choice(api_version):
    """
    litellm.drop_params = True
    optional_params = litellm.utils.get_optional_params(
-        model="chatgpt-v-2",
+        model="chatgpt-v-3",
        user="John",
        custom_llm_provider="azure",
        max_tokens=10,
--- a/tests/llm_translation/test_rerank.py
+++ b/tests/llm_translation/test_rerank.py
@ -150,6 +150,7 @@ async def test_basic_rerank_together_ai(sync_mode):

@pytest.mark.asyncio()
@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.skip(reason="Skipping test due to Cohere RBAC issues")
 async def test_basic_rerank_azure_ai(sync_mode):
    import os

--- a/tests/load_tests/test_datadog_load_test.py
+++ b/tests/load_tests/test_datadog_load_test.py
@ -91,7 +91,7 @@ async def make_async_calls(metadata=None, **completion_kwargs):
 def create_async_task(**completion_kwargs):
    litellm.set_verbose = True
    completion_args = {
-        "model": "openai/chatgpt-v-2",
+        "model": "openai/chatgpt-v-3",
        "api_version": "2024-02-01",
        "messages": [{"role": "user", "content": "This is a test"}],
        "max_tokens": 5,
--- a/tests/load_tests/test_otel_load_test.py
+++ b/tests/load_tests/test_otel_load_test.py
@ -86,7 +86,7 @@ def create_async_task(**completion_kwargs):
    By default a standard set of arguments are used for the litellm.acompletion function.
    """
    completion_args = {
-        "model": "openai/chatgpt-v-2",
+        "model": "openai/chatgpt-v-3",
        "api_version": "2024-02-01",
        "messages": [{"role": "user", "content": "This is a test" * 100}],
        "max_tokens": 5,
--- a/tests/local_testing/example_config_yaml/azure_config.yaml
+++ b/tests/local_testing/example_config_yaml/azure_config.yaml
@ -1,7 +1,7 @@
 model_list:
  - model_name: gpt-4-team1
    litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
      api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
      api_version: "2023-05-15"
      api_key:  os.environ/AZURE_API_KEY
--- a/tests/local_testing/test_acooldowns_router.py
+++ b/tests/local_testing/test_acooldowns_router.py
@ -26,7 +26,7 @@ model_list = [
    {  # list of model deployments
        "model_name": "gpt-3.5-turbo",  # openai model name
        "litellm_params": {  # params for litellm completion/embedding call
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
            "api_key": "bad-key",
            "api_version": os.getenv("AZURE_API_VERSION"),
            "api_base": os.getenv("AZURE_API_BASE"),
@ -143,7 +143,7 @@ async def test_cooldown_same_model_name(sync_mode):
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": "bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -153,7 +153,7 @@ async def test_cooldown_same_model_name(sync_mode):
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -184,7 +184,7 @@ async def test_cooldown_same_model_name(sync_mode):
                model_ids.append(model["model_info"]["id"])
            print("\n litellm model ids ", model_ids)

-            # example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
+            # example litellm_model_names ['azure/chatgpt-v-3-ModelID-64321', 'azure/chatgpt-v-3-ModelID-63960']
            assert (
                model_ids[0] != model_ids[1]
            )  # ensure both models have a uuid added, and they have different names
@ -201,7 +201,7 @@ async def test_cooldown_same_model_name(sync_mode):
                model_ids.append(model["model_info"]["id"])
            print("\n litellm model ids ", model_ids)

-            # example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
+            # example litellm_model_names ['azure/chatgpt-v-3-ModelID-64321', 'azure/chatgpt-v-3-ModelID-63960']
            assert (
                model_ids[0] != model_ids[1]
            )  # ensure both models have a uuid added, and they have different names
--- a/tests/local_testing/test_alangfuse.py
+++ b/tests/local_testing/test_alangfuse.py
@ -194,7 +194,7 @@ def create_async_task(**completion_kwargs):
    By default a standard set of arguments are used for the litellm.acompletion function.
    """
    completion_args = {
-        "model": "azure/chatgpt-v-2",
+        "model": "azure/chatgpt-v-3",
        "api_version": "2024-02-01",
        "messages": [{"role": "user", "content": "This is a test"}],
        "max_tokens": 5,
--- a/tests/local_testing/test_assistants.py
+++ b/tests/local_testing/test_assistants.py
@ -71,7 +71,7 @@ async def test_create_delete_assistants(provider, sync_mode):
    model = "gpt-4-turbo"
    if provider == "azure":
        os.environ["AZURE_API_VERSION"] = "2024-05-01-preview"
-        model = "chatgpt-v-2"
+        model = "chatgpt-v-3"

    if sync_mode == True:
        assistant = litellm.create_assistants(
--- a/tests/local_testing/test_azure_openai.py
+++ b/tests/local_testing/test_azure_openai.py
@ -46,7 +46,7 @@ async def test_aaaaazure_tenant_id_auth(respx_mock: MockRouter):
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_base": os.getenv("AZURE_API_BASE"),
                    "tenant_id": os.getenv("AZURE_TENANT_ID"),
                    "client_id": os.getenv("AZURE_CLIENT_ID"),
@ -95,6 +95,6 @@ async def test_aaaaazure_tenant_id_auth(respx_mock: MockRouter):

        assert json_body == {
            "messages": [{"role": "user", "content": "Hello world!"}],
-            "model": "chatgpt-v-2",
+            "model": "chatgpt-v-3",
            "stream": False,
        }
--- a/tests/local_testing/test_azure_perf.py
+++ b/tests/local_testing/test_azure_perf.py
@ -18,7 +18,7 @@
 #     {
 #         "model_name": "azure-test",
 #         "litellm_params": {
-#             "model": "azure/chatgpt-v-2",
+#             "model": "azure/chatgpt-v-3",
 #             "api_key": os.getenv("AZURE_API_KEY"),
 #             "api_base": os.getenv("AZURE_API_BASE"),
 #             "api_version": os.getenv("AZURE_API_VERSION"),
@ -33,7 +33,7 @@
 #     try:
 #         start_time = time.time()
 #         response = await client.chat.completions.create(
-#             model="chatgpt-v-2",
+#             model="chatgpt-v-3",
 #             messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
 #             stream=True,
 #         )
--- a/tests/local_testing/test_caching.py
+++ b/tests/local_testing/test_caching.py
@ -324,7 +324,7 @@ def test_caching_with_models_v2():
    litellm.set_verbose = True
    response1 = completion(model="gpt-3.5-turbo", messages=messages, caching=True)
    response2 = completion(model="gpt-3.5-turbo", messages=messages, caching=True)
-    response3 = completion(model="azure/chatgpt-v-2", messages=messages, caching=True)
+    response3 = completion(model="azure/chatgpt-v-3", messages=messages, caching=True)
    print(f"response1: {response1}")
    print(f"response2: {response2}")
    print(f"response3: {response3}")
@ -1170,7 +1170,7 @@ async def test_s3_cache_stream_azure(sync_mode):

        if sync_mode:
            response1 = litellm.completion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                messages=messages,
                max_tokens=40,
                temperature=1,
@ -1183,7 +1183,7 @@ async def test_s3_cache_stream_azure(sync_mode):
            print(response_1_content)
        else:
            response1 = await litellm.acompletion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                messages=messages,
                max_tokens=40,
                temperature=1,
@ -1203,7 +1203,7 @@ async def test_s3_cache_stream_azure(sync_mode):

        if sync_mode:
            response2 = litellm.completion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                messages=messages,
                max_tokens=40,
                temperature=1,
@ -1216,7 +1216,7 @@ async def test_s3_cache_stream_azure(sync_mode):
            print(response_2_content)
        else:
            response2 = await litellm.acompletion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                messages=messages,
                max_tokens=40,
                temperature=1,
@ -1279,7 +1279,7 @@ async def test_s3_cache_acompletion_azure():
        print("s3 Cache: test for caching, streaming + completion")

        response1 = await litellm.acompletion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=messages,
            max_tokens=40,
            temperature=1,
@ -1289,7 +1289,7 @@ async def test_s3_cache_acompletion_azure():
        time.sleep(2)

        response2 = await litellm.acompletion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=messages,
            max_tokens=40,
            temperature=1,
--- a/tests/local_testing/test_caching_ssl.py
+++ b/tests/local_testing/test_caching_ssl.py
@ -58,7 +58,7 @@ def test_caching_router():
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
--- a/tests/local_testing/test_class.py
+++ b/tests/local_testing/test_class.py
@ -55,7 +55,7 @@
 # #             {
 # #                 "model_name": "gpt-3.5-turbo",  # openai model name
 # #                 "litellm_params": {  # params for litellm completion/embedding call
-# #                     "model": "azure/chatgpt-v-2",
+# #                     "model": "azure/chatgpt-v-3",
 # #                     "api_key": os.getenv("AZURE_API_KEY"),
 # #                     "api_version": os.getenv("AZURE_API_VERSION"),
 # #                     "api_base": os.getenv("AZURE_API_BASE"),
@ -93,7 +93,7 @@
 # #             {
 # #                 "model_name": "gpt-3.5-turbo",  # openai model name
 # #                 "litellm_params": {  # params for litellm completion/embedding call
-# #                     "model": "azure/chatgpt-v-2",
+# #                     "model": "azure/chatgpt-v-3",
 # #                     "api_key": os.getenv("AZURE_API_KEY"),
 # #                     "api_version": os.getenv("AZURE_API_VERSION"),
 # #                     "api_base": os.getenv("AZURE_API_BASE"),
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@ -732,7 +732,7 @@ def encode_image(image_path):
    "model",
    [
        "gpt-4o",
-        "azure/gpt-4o",
+        "azure/gpt-4o-new-test",
        "anthropic/claude-3-opus-20240229",
    ],
 )  #
@ -1824,9 +1824,9 @@ def test_completion_openai():
    "model, api_version",
    [
        # ("gpt-4o-2024-08-06", None),
-        # ("azure/chatgpt-v-2", None),
+        # ("azure/chatgpt-v-3", None),
        ("bedrock/anthropic.claude-3-sonnet-20240229-v1:0", None),
-        # ("azure/gpt-4o", "2024-08-01-preview"),
+        # ("azure/gpt-4o-new-test", "2024-08-01-preview"),
    ],
 )
@pytest.mark.flaky(retries=3, delay=1)
@ -2495,7 +2495,7 @@ def test_completion_azure_extra_headers():
        litellm.client_session = http_client
        try:
            response = completion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                messages=messages,
                api_base=os.getenv("AZURE_API_BASE"),
                api_version="2023-07-01-preview",
@ -2544,7 +2544,7 @@ def test_completion_azure_ad_token():
        litellm.client_session = http_client
        try:
            response = completion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                messages=messages,
                azure_ad_token="my-special-token",
            )
@ -2575,7 +2575,7 @@ def test_completion_azure_key_completion_arg():
        litellm.set_verbose = True
        ## Test azure call
        response = completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=messages,
            api_key=old_key,
            logprobs=True,
@ -2633,7 +2633,7 @@ async def test_re_use_azure_async_client():
        ## Test azure call
        for _ in range(3):
            response = await litellm.acompletion(
-                model="azure/chatgpt-v-2", messages=messages, client=client
+                model="azure/chatgpt-v-3", messages=messages, client=client
            )
            print(f"response: {response}")
    except Exception as e:
@ -2665,7 +2665,7 @@ def test_completion_azure():
        litellm.set_verbose = False
        ## Test azure call
        response = completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=messages,
            api_key="os.environ/AZURE_API_KEY",
        )
@ -2673,7 +2673,7 @@ def test_completion_azure():
        print(f"response hidden params: {response._hidden_params}")
        ## Test azure flag for backwards-compat
        # response = completion(
-        #     model="chatgpt-v-2",
+        #     model="chatgpt-v-3",
        #     messages=messages,
        #     azure=True,
        #     max_tokens=10
@ -2712,7 +2712,7 @@ def test_azure_openai_ad_token():
    litellm.input_callback = [tester]
    try:
        response = litellm.completion(
-            model="azure/chatgpt-v-2",  # e.g. gpt-35-instant
+            model="azure/chatgpt-v-3",  # e.g. gpt-35-instant
            messages=[
                {
                    "role": "user",
@ -2750,7 +2750,7 @@ def test_completion_azure2():

        ## Test azure call
        response = completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=messages,
            api_base=api_base,
            api_key=api_key,
@ -2787,7 +2787,7 @@ def test_completion_azure3():

        ## Test azure call
        response = completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=messages,
            max_tokens=10,
        )
@ -2835,7 +2835,7 @@ def test_completion_azure_with_litellm_key():
        openai.api_key = "ymca"

        response = completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=messages,
        )
        # Add any assertions here to check the response
@ -2863,7 +2863,7 @@ def test_completion_azure_deployment_id():
    try:
        litellm.set_verbose = True
        response = completion(
-            deployment_id="chatgpt-v-2",
+            deployment_id="chatgpt-v-3",
            model="gpt-3.5-turbo",
            messages=messages,
        )
@ -3925,7 +3925,7 @@ def test_completion_stream_watsonx():
@pytest.mark.parametrize(
    "provider, model, project, region_name, token",
    [
-        ("azure", "chatgpt-v-2", None, None, "test-token"),
+        ("azure", "chatgpt-v-3", None, None, "test-token"),
        ("vertex_ai", "anthropic-claude-3", "adroit-crow-1", "us-east1", None),
        ("watsonx", "ibm/granite", "96946574", "dallas", "1234"),
        ("bedrock", "anthropic.claude-3", None, "us-east-1", None),
@ -4178,7 +4178,7 @@ async def test_completion_ai21_chat():

@pytest.mark.parametrize(
    "model",
-    ["gpt-4o", "azure/chatgpt-v-2"],
+    ["gpt-4o", "azure/chatgpt-v-3"],
 )
@pytest.mark.parametrize(
    "stream",
@ -4200,7 +4200,7 @@ def test_completion_response_ratelimit_headers(model, stream):
    assert "x-ratelimit-remaining-requests" in additional_headers
    assert "x-ratelimit-remaining-tokens" in additional_headers

-    if model == "azure/chatgpt-v-2":
+    if model == "azure/chatgpt-v-3":
        # Azure OpenAI header
        assert "llm_provider-azureml-model-session" in additional_headers
    if model == "claude-3-sonnet-20240229":
--- a/tests/local_testing/test_config.py
+++ b/tests/local_testing/test_config.py
@ -46,7 +46,7 @@ async def test_delete_deployment():
    import base64

    litellm_params = LiteLLM_Params(
-        model="azure/chatgpt-v-2",
+        model="azure/chatgpt-v-3",
        api_key=os.getenv("AZURE_API_KEY"),
        api_base=os.getenv("AZURE_API_BASE"),
        api_version=os.getenv("AZURE_API_VERSION"),
@ -232,7 +232,7 @@ async def test_db_error_new_model_check():


 litellm_params = LiteLLM_Params(
-    model="azure/chatgpt-v-2",
+    model="azure/chatgpt-v-3",
    api_key=os.getenv("AZURE_API_KEY"),
    api_base=os.getenv("AZURE_API_BASE"),
    api_version=os.getenv("AZURE_API_VERSION"),
@ -250,7 +250,7 @@ def _create_model_list(flag_value: Literal[0, 1], master_key: str):
    import base64

    new_litellm_params = LiteLLM_Params(
-        model="azure/chatgpt-v-2-3",
+        model="azure/chatgpt-v-3-3",
        api_key=os.getenv("AZURE_API_KEY"),
        api_base=os.getenv("AZURE_API_BASE"),
        api_version=os.getenv("AZURE_API_VERSION"),
--- a/tests/local_testing/test_configs/test_bad_config.yaml
+++ b/tests/local_testing/test_configs/test_bad_config.yaml
@ -5,12 +5,12 @@ model_list:
      model: gpt-3.5-turbo
  - model_name: working-azure-gpt-3.5-turbo
    litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
      api_base: os.environ/AZURE_API_BASE
      api_key: os.environ/AZURE_API_KEY
  - model_name: azure-gpt-3.5-turbo
    litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
      api_base: os.environ/AZURE_API_BASE
      api_key: bad-key
  - model_name: azure-embedding
--- a/tests/local_testing/test_configs/test_cloudflare_azure_with_cache_config.yaml
+++ b/tests/local_testing/test_configs/test_cloudflare_azure_with_cache_config.yaml
@ -1,7 +1,7 @@
 model_list:
  - model_name: azure-cloudflare
    litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
      api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
      api_key: os.environ/AZURE_API_KEY
      api_version: 2023-07-01-preview
--- a/tests/local_testing/test_configs/test_config_no_auth.yaml
+++ b/tests/local_testing/test_configs/test_config_no_auth.yaml
@ -12,7 +12,7 @@ model_list:
 - litellm_params:
    api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
    api_key: os.environ/AZURE_API_KEY
-    model: azure/chatgpt-v-2
+    model: azure/chatgpt-v-3
  model_name: azure-cloudflare-model
 - litellm_params:
    api_base: https://openai-france-1234.openai.azure.com
--- a/tests/local_testing/test_configs/test_custom_logger.yaml
+++ b/tests/local_testing/test_configs/test_custom_logger.yaml
@ -1,7 +1,7 @@
 model_list: 
  - model_name: Azure OpenAI GPT-4 Canada
    litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
      api_base: os.environ/AZURE_API_BASE
      api_key: os.environ/AZURE_API_KEY
      api_version: "2023-07-01-preview"
--- a/tests/local_testing/test_custom_callback_input.py
+++ b/tests/local_testing/test_custom_callback_input.py
@ -450,12 +450,12 @@ def test_chat_azure_stream():
        customHandler = CompletionCustomHandler()
        litellm.callbacks = [customHandler]
        response = litellm.completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
        )
        # test streaming
        response = litellm.completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
            stream=True,
        )
@ -464,7 +464,7 @@ def test_chat_azure_stream():
        # test failure callback
        try:
            response = litellm.completion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
                api_key="my-bad-key",
                stream=True,
@ -491,12 +491,12 @@ async def test_async_chat_azure_stream():
        customHandler = CompletionCustomHandler()
        litellm.callbacks = [customHandler]
        response = await litellm.acompletion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}],
        )
        ## test streaming
        response = await litellm.acompletion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}],
            stream=True,
        )
@ -507,7 +507,7 @@ async def test_async_chat_azure_stream():
        # test failure callback
        try:
            response = await litellm.acompletion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}],
                api_key="my-bad-key",
                stream=True,
@ -1018,7 +1018,7 @@ async def test_async_completion_azure_caching():
    litellm.callbacks = [customHandler_caching]
    unique_time = time.time()
    response1 = await litellm.acompletion(
-        model="azure/chatgpt-v-2",
+        model="azure/chatgpt-v-3",
        messages=[
            {"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
        ],
@ -1027,7 +1027,7 @@ async def test_async_completion_azure_caching():
    await asyncio.sleep(1)
    print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
    response2 = await litellm.acompletion(
-        model="azure/chatgpt-v-2",
+        model="azure/chatgpt-v-3",
        messages=[
            {"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
        ],
@ -1056,7 +1056,7 @@ async def test_async_completion_azure_caching_streaming():
    litellm.callbacks = [customHandler_caching]
    unique_time = uuid.uuid4()
    response1 = await litellm.acompletion(
-        model="azure/chatgpt-v-2",
+        model="azure/chatgpt-v-3",
        messages=[
            {"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
        ],
@ -1069,7 +1069,7 @@ async def test_async_completion_azure_caching_streaming():
    initial_customhandler_caching_states = len(customHandler_caching.states)
    print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
    response2 = await litellm.acompletion(
-        model="azure/chatgpt-v-2",
+        model="azure/chatgpt-v-3",
        messages=[
            {"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
        ],
@ -1207,7 +1207,7 @@ def test_turn_off_message_logging():
    "model",
    [
        "ft:gpt-3.5-turbo:my-org:custom_suffix:id"
-    ],  # "gpt-3.5-turbo", "azure/chatgpt-v-2",
+    ],  # "gpt-3.5-turbo", "azure/chatgpt-v-3",
 )
@pytest.mark.parametrize(
    "turn_off_message_logging",
--- a/tests/local_testing/test_custom_callback_router.py
+++ b/tests/local_testing/test_custom_callback_router.py
@ -284,7 +284,7 @@ class CompletionCustomHandler(
            )

            if (
-                kwargs["model"] == "chatgpt-v-2"
+                kwargs["model"] == "chatgpt-v-3"
                and base_model is not None
                and kwargs["stream"] != True
            ):
@ -394,7 +394,7 @@ async def test_async_chat_azure():
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -438,7 +438,7 @@ async def test_async_chat_azure():
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": "my-bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -545,7 +545,7 @@ async def test_async_chat_azure_with_fallbacks():
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": "my-bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -606,7 +606,7 @@ async def test_async_completion_azure_caching():
        {
            "model_name": "gpt-3.5-turbo",  # openai model name
            "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": os.getenv("AZURE_API_KEY"),
                "api_version": os.getenv("AZURE_API_VERSION"),
                "api_base": os.getenv("AZURE_API_BASE"),
--- a/tests/local_testing/test_custom_logger.py
+++ b/tests/local_testing/test_custom_logger.py
@ -160,7 +160,7 @@ def test_completion_azure_stream_moderation_failure():
        ]
        try:
            response = completion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                messages=messages,
                mock_response="Exception: content_filter_policy",
                stream=True,
@ -195,7 +195,7 @@ def test_async_custom_handler_stream():
        async def test_1():
            nonlocal complete_streaming_response
            response = await litellm.acompletion(
-                model="azure/chatgpt-v-2", messages=messages, stream=True
+                model="azure/chatgpt-v-3", messages=messages, stream=True
            )
            async for chunk in response:
                complete_streaming_response += (
@ -239,7 +239,7 @@ def test_azure_completion_stream():
        complete_streaming_response = ""

        response = litellm.completion(
-            model="azure/chatgpt-v-2", messages=messages, stream=True
+            model="azure/chatgpt-v-3", messages=messages, stream=True
        )
        for chunk in response:
            complete_streaming_response += chunk["choices"][0]["delta"]["content"] or ""
--- a/tests/local_testing/test_exceptions.py
+++ b/tests/local_testing/test_exceptions.py
@ -51,7 +51,7 @@ async def test_content_policy_exception_azure():
        # this is ony a test - we needed some way to invoke the exception :(
        litellm.set_verbose = True
        response = await litellm.acompletion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=[{"role": "user", "content": "where do I buy lethal drugs from"}],
            mock_response="Exception: content_filter_policy",
        )
@ -124,7 +124,7 @@ def test_context_window_with_fallbacks(model):
    ctx_window_fallback_dict = {
        "command-nightly": "claude-2.1",
        "gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k",
-        "azure/chatgpt-v-2": "gpt-3.5-turbo-16k",
+        "azure/chatgpt-v-3": "gpt-3.5-turbo-16k",
    }
    sample_text = "how does a court case get to the Supreme Court?" * 1000
    messages = [{"content": sample_text, "role": "user"}]
@ -161,7 +161,7 @@ def invalid_auth(model):  # set the model key to an invalid key, depending on th
            os.environ["AWS_REGION_NAME"] = "bad-key"
            temporary_secret_key = os.environ["AWS_SECRET_ACCESS_KEY"]
            os.environ["AWS_SECRET_ACCESS_KEY"] = "bad-key"
-        elif model == "azure/chatgpt-v-2":
+        elif model == "azure/chatgpt-v-3":
            temporary_key = os.environ["AZURE_API_KEY"]
            os.environ["AZURE_API_KEY"] = "bad-key"
        elif model == "claude-3-5-haiku-20241022":
@ -262,7 +262,7 @@ def test_completion_azure_exception():
        old_azure_key = os.environ["AZURE_API_KEY"]
        os.environ["AZURE_API_KEY"] = "good morning"
        response = completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=[{"role": "user", "content": "hello"}],
        )
        os.environ["AZURE_API_KEY"] = old_azure_key
@ -309,7 +309,7 @@ async def asynctest_completion_azure_exception():
        old_azure_key = os.environ["AZURE_API_KEY"]
        os.environ["AZURE_API_KEY"] = "good morning"
        response = await litellm.acompletion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=[{"role": "user", "content": "hello"}],
        )
        print(f"response: {response}")
@ -528,7 +528,7 @@ def test_content_policy_violation_error_streaming():
    async def test_get_response():
        try:
            response = await litellm.acompletion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                messages=[{"role": "user", "content": "say 1"}],
                temperature=0,
                top_p=1,
@ -557,7 +557,7 @@ def test_content_policy_violation_error_streaming():
    async def test_get_error():
        try:
            response = await litellm.acompletion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                messages=[
                    {"role": "user", "content": "where do i buy lethal drugs from"}
                ],
@ -754,7 +754,7 @@ def test_litellm_predibase_exception():
 #     return False
 # # Repeat each model 500 times
 # # extended_models = [model for model in models for _ in range(250)]
-# extended_models = ["azure/chatgpt-v-2" for _ in range(250)]
+# extended_models = ["azure/chatgpt-v-3" for _ in range(250)]

 # def worker(model):
 #     return test_model_call(model)
@ -934,7 +934,7 @@ def _pre_call_utils_httpx(
        ("openai", "gpt-3.5-turbo", "chat_completion", False),
        ("openai", "gpt-3.5-turbo", "chat_completion", True),
        ("openai", "gpt-3.5-turbo-instruct", "completion", True),
-        ("azure", "azure/chatgpt-v-2", "chat_completion", True),
+        ("azure", "azure/chatgpt-v-3", "chat_completion", True),
        ("azure", "azure/text-embedding-ada-002", "embedding", True),
        ("azure", "azure_text/gpt-3.5-turbo-instruct", "completion", True),
    ],
@ -1158,7 +1158,7 @@ async def test_exception_with_headers_httpx(


@pytest.mark.asyncio
-@pytest.mark.parametrize("model", ["azure/chatgpt-v-2", "openai/gpt-3.5-turbo"])
+@pytest.mark.parametrize("model", ["azure/chatgpt-v-3", "openai/gpt-3.5-turbo"])
 async def test_bad_request_error_contains_httpx_response(model):
    """
    Test that the BadRequestError contains the httpx response
@ -1209,7 +1209,7 @@ def test_context_window_exceeded_error_from_litellm_proxy():

@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.parametrize("stream_mode", [True, False])
-@pytest.mark.parametrize("model", ["azure/gpt-4o"])  # "gpt-4o-mini",
+@pytest.mark.parametrize("model", ["azure/gpt-4o-new-test"])  # "gpt-4o-mini",
@pytest.mark.asyncio
 async def test_exception_bubbling_up(sync_mode, stream_mode, model):
    """
--- a/tests/local_testing/test_gcs_bucket.py
+++ b/tests/local_testing/test_gcs_bucket.py
@ -108,14 +108,14 @@ async def test_aaabasic_gcs_logger():
            },
            "endpoint": "http://localhost:4000/chat/completions",
            "model_group": "gpt-3.5-turbo",
-            "deployment": "azure/chatgpt-v-2",
+            "deployment": "azure/chatgpt-v-3",
            "model_info": {
                "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
                "db_model": False,
            },
            "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
            "caching_groups": None,
-            "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
+            "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
        },
    )

@ -216,14 +216,14 @@ async def test_basic_gcs_logger_failure():
                },
                "endpoint": "http://localhost:4000/chat/completions",
                "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
                "model_info": {
                    "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
                    "db_model": False,
                },
                "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
                "caching_groups": None,
-                "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
+                "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
            },
        )
    except Exception:
@ -626,14 +626,14 @@ async def test_basic_gcs_logger_with_folder_in_bucket_name():
            },
            "endpoint": "http://localhost:4000/chat/completions",
            "model_group": "gpt-3.5-turbo",
-            "deployment": "azure/chatgpt-v-2",
+            "deployment": "azure/chatgpt-v-3",
            "model_info": {
                "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
                "db_model": False,
            },
            "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
            "caching_groups": None,
-            "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
+            "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
        },
    )

--- a/tests/local_testing/test_health_check.py
+++ b/tests/local_testing/test_health_check.py
@ -20,7 +20,7 @@ import litellm
 async def test_azure_health_check():
    response = await litellm.ahealth_check(
        model_params={
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
            "messages": [{"role": "user", "content": "Hey, how's it going?"}],
            "api_key": os.getenv("AZURE_API_KEY"),
            "api_base": os.getenv("AZURE_API_BASE"),
--- a/tests/local_testing/test_helicone_integration.py
+++ b/tests/local_testing/test_helicone_integration.py
@ -78,7 +78,7 @@ async def make_async_calls(metadata=None, **completion_kwargs):

 def create_async_task(**completion_kwargs):
    completion_args = {
-        "model": "azure/chatgpt-v-2",
+        "model": "azure/chatgpt-v-3",
        "api_version": "2024-02-01",
        "messages": [{"role": "user", "content": "This is a test"}],
        "max_tokens": 5,
--- a/tests/local_testing/test_least_busy_routing.py
+++ b/tests/local_testing/test_least_busy_routing.py
@ -33,7 +33,7 @@ def test_model_added():
        "litellm_params": {
            "metadata": {
                "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
            },
            "model_info": {"id": "1234"},
        }
@ -47,7 +47,7 @@ def test_get_available_deployments():
    test_cache = DualCache()
    least_busy_logger = LeastBusyLoggingHandler(router_cache=test_cache, model_list=[])
    model_group = "gpt-3.5-turbo"
-    deployment = "azure/chatgpt-v-2"
+    deployment = "azure/chatgpt-v-3"
    kwargs = {
        "litellm_params": {
            "metadata": {
@ -113,7 +113,7 @@ async def test_router_get_available_deployments(async_test):
    router.leastbusy_logger.test_flag = True

    model_group = "azure-model"
-    deployment = "azure/chatgpt-v-2"
+    deployment = "azure/chatgpt-v-3"
    request_count_dict = {1: 10, 2: 54, 3: 100}
    cache_key = f"{model_group}_request_count"
    if async_test is True:
--- a/tests/local_testing/test_load_test_router_s3.py
+++ b/tests/local_testing/test_load_test_router_s3.py
@ -46,7 +46,7 @@
 #         {
 #             "model_name": "gpt-3.5-turbo",
 #             "litellm_params": {
-#                 "model": "azure/chatgpt-v-2",
+#                 "model": "azure/chatgpt-v-3",
 #                 "api_key": os.getenv("AZURE_API_KEY"),
 #                 "api_base": os.getenv("AZURE_API_BASE"),
 #                 "api_version": os.getenv("AZURE_API_VERSION"),
--- a/tests/local_testing/test_loadtest_router.py
+++ b/tests/local_testing/test_loadtest_router.py
@ -38,7 +38,7 @@
 #         {
 #             "model_name": "gpt-3.5-turbo",
 #             "litellm_params": {
-#                 "model": "azure/chatgpt-v-2",
+#                 "model": "azure/chatgpt-v-3",
 #                 "api_key": os.getenv("AZURE_API_KEY"),
 #                 "api_base": os.getenv("AZURE_API_BASE"),
 #                 "api_version": os.getenv("AZURE_API_VERSION"),
--- a/tests/local_testing/test_lowest_cost_routing.py
+++ b/tests/local_testing/test_lowest_cost_routing.py
@ -60,7 +60,7 @@ async def test_get_available_deployments_custom_price():
        {
            "model_name": "gpt-3.5-turbo",
            "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "input_cost_per_token": 0.00003,
                "output_cost_per_token": 0.00003,
            },
--- a/tests/local_testing/test_lowest_latency_routing.py
+++ b/tests/local_testing/test_lowest_latency_routing.py
@ -48,7 +48,7 @@ async def test_latency_memory_leak(sync_mode):
        "litellm_params": {
            "metadata": {
                "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
            },
            "model_info": {"id": deployment_id},
        }
@ -130,7 +130,7 @@ def test_latency_updated():
        "litellm_params": {
            "metadata": {
                "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
            },
            "model_info": {"id": deployment_id},
        }
@ -173,7 +173,7 @@ def test_latency_updated_custom_ttl():
        "litellm_params": {
            "metadata": {
                "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
            },
            "model_info": {"id": deployment_id},
        }
@ -200,12 +200,12 @@ def test_get_available_deployments():
    model_list = [
        {
            "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
            "model_info": {"id": "1234"},
        },
        {
            "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
            "model_info": {"id": "5678"},
        },
    ]
@ -219,7 +219,7 @@ def test_get_available_deployments():
        "litellm_params": {
            "metadata": {
                "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
            },
            "model_info": {"id": deployment_id},
        }
@ -240,7 +240,7 @@ def test_get_available_deployments():
        "litellm_params": {
            "metadata": {
                "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
            },
            "model_info": {"id": deployment_id},
        }
@ -275,7 +275,7 @@ async def _deploy(lowest_latency_logger, deployment_id, tokens_used, duration):
        "litellm_params": {
            "metadata": {
                "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
            },
            "model_info": {"id": deployment_id},
        }
@ -317,12 +317,12 @@ def test_get_available_endpoints_tpm_rpm_check_async(ans_rpm):
    model_list = [
        {
            "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
            "model_info": {"id": "1234", "rpm": ans_rpm},
        },
        {
            "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
            "model_info": {"id": "5678", "rpm": non_ans_rpm},
        },
    ]
@ -366,12 +366,12 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm):
    model_list = [
        {
            "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
            "model_info": {"id": "1234", "rpm": ans_rpm},
        },
        {
            "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
            "model_info": {"id": "5678", "rpm": non_ans_rpm},
        },
    ]
@ -385,7 +385,7 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm):
        "litellm_params": {
            "metadata": {
                "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
            },
            "model_info": {"id": deployment_id},
        }
@ -407,7 +407,7 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm):
        "litellm_params": {
            "metadata": {
                "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
            },
            "model_info": {"id": deployment_id},
        }
--- a/tests/local_testing/test_mem_usage.py
+++ b/tests/local_testing/test_mem_usage.py
@ -29,7 +29,7 @@
 #     {
 #         "model_name": "gpt-3.5-turbo",  # openai model name
 #         "litellm_params": {  # params for litellm completion/embedding call
-#             "model": "azure/chatgpt-v-2",
+#             "model": "azure/chatgpt-v-3",
 #             "api_key": os.getenv("AZURE_API_KEY"),
 #             "api_version": os.getenv("AZURE_API_VERSION"),
 #             "api_base": os.getenv("AZURE_API_BASE"),
@ -40,7 +40,7 @@
 #     {
 #         "model_name": "bad-model",  # openai model name
 #         "litellm_params": {  # params for litellm completion/embedding call
-#             "model": "azure/chatgpt-v-2",
+#             "model": "azure/chatgpt-v-3",
 #             "api_key": "bad-key",
 #             "api_version": os.getenv("AZURE_API_VERSION"),
 #             "api_base": os.getenv("AZURE_API_BASE"),
--- a/tests/local_testing/test_mock_request.py
+++ b/tests/local_testing/test_mock_request.py
@ -157,7 +157,7 @@ def test_router_mock_request_with_mock_timeout_with_fallbacks():
            {
                "model_name": "azure-gpt",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                },
--- a/tests/local_testing/test_prometheus_service.py
+++ b/tests/local_testing/test_prometheus_service.py
@ -104,12 +104,12 @@ async def test_router_with_caching():
        model_list = [
            {
                "model_name": "azure/gpt-4",
-                "litellm_params": get_azure_params("chatgpt-v-2"),
+                "litellm_params": get_azure_params("chatgpt-v-3"),
                "tpm": 100,
            },
            {
                "model_name": "azure/gpt-4",
-                "litellm_params": get_azure_params("chatgpt-v-2"),
+                "litellm_params": get_azure_params("chatgpt-v-3"),
                "tpm": 1000,
            },
        ]
--- a/tests/local_testing/test_prompt_injection_detection.py
+++ b/tests/local_testing/test_prompt_injection_detection.py
@ -107,7 +107,7 @@ async def test_prompt_injection_llm_eval():
                {
                    "model_name": "gpt-3.5-turbo",  # openai model name
                    "litellm_params": {  # params for litellm completion/embedding call
-                        "model": "azure/chatgpt-v-2",
+                        "model": "azure/chatgpt-v-3",
                        "api_key": os.getenv("AZURE_API_KEY"),
                        "api_version": os.getenv("AZURE_API_VERSION"),
                        "api_base": os.getenv("AZURE_API_BASE"),
--- a/tests/local_testing/test_provider_specific_config.py
+++ b/tests/local_testing/test_provider_specific_config.py
@ -729,7 +729,7 @@ def azure_openai_test_completion():
    try:
        # OVERRIDE WITH DYNAMIC MAX TOKENS
        response_1 = litellm.completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=[
                {
                    "content": "Hello, how are you? Be as verbose as possible",
@ -743,7 +743,7 @@ def azure_openai_test_completion():

        # USE CONFIG TOKENS
        response_2 = litellm.completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=[
                {
                    "content": "Hello, how are you? Be as verbose as possible",
--- a/tests/local_testing/test_router.py
+++ b/tests/local_testing/test_router.py
@ -266,7 +266,7 @@ def test_router_sensitive_keys():
                {
                    "model_name": "gpt-3.5-turbo",  # openai model name
                    "litellm_params": {  # params for litellm completion/embedding call
-                        "model": "azure/chatgpt-v-2",
+                        "model": "azure/chatgpt-v-3",
                        "api_key": "special-key",
                    },
                    "model_info": {"id": 12345},
@ -334,7 +334,7 @@ async def test_router_retries(sync_mode):
        {
            "model_name": "gpt-3.5-turbo",
            "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": os.getenv("AZURE_API_KEY"),
                "api_base": os.getenv("AZURE_API_BASE"),
                "api_version": os.getenv("AZURE_API_VERSION"),
@ -417,7 +417,7 @@ def test_exception_raising():
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": "bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -479,7 +479,7 @@ def test_reading_key_from_model_list():
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": old_api_key,
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -535,7 +535,7 @@ def test_reading_key_from_model_list():
 def test_call_one_endpoint():
    # [PROD TEST CASE]
    # user passes one deployment they want to call on the router, we call the specified one
-    # this test makes a completion calls azure/chatgpt-v-2, it should work
+    # this test makes a completion calls azure/chatgpt-v-3, it should work
    try:
        print("Testing calling a specific deployment")
        old_api_key = os.environ["AZURE_API_KEY"]
@ -544,7 +544,7 @@ def test_call_one_endpoint():
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": old_api_key,
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -574,7 +574,7 @@ def test_call_one_endpoint():

        async def call_azure_completion():
            response = await router.acompletion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                messages=[{"role": "user", "content": "hello this request will pass"}],
                specific_deployment=True,
            )
@ -620,7 +620,7 @@ def test_router_azure_acompletion():
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": old_api_key,
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -793,7 +793,7 @@ def test_router_context_window_check_pre_call_check_in_group_custom_model_info()
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -847,7 +847,7 @@ def test_router_context_window_check_pre_call_check():
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -901,7 +901,7 @@ def test_router_context_window_check_pre_call_check_out_group():
            {
                "model_name": "gpt-3.5-turbo-small",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -980,7 +980,7 @@ def test_router_region_pre_call_check(allowed_model_region):
        {
            "model_name": "gpt-3.5-turbo",  # openai model name
            "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": os.getenv("AZURE_API_KEY"),
                "api_version": os.getenv("AZURE_API_VERSION"),
                "api_base": os.getenv("AZURE_API_BASE"),
@ -2616,7 +2616,7 @@ def test_is_team_specific_model():
 #             {
 #                 "model_name": "gpt-3.5-turbo",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_base": os.getenv("AZURE_API_BASE"),
 #                     "tpm": 100000,
@ -2626,7 +2626,7 @@ def test_is_team_specific_model():
 #             {
 #                 "model_name": "gpt-3.5-turbo",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_base": os.getenv("AZURE_API_BASE"),
 #                     "tpm": 500,
--- a/tests/local_testing/test_router_budget_limiter.py
+++ b/tests/local_testing/test_router_budget_limiter.py
@ -74,7 +74,7 @@ async def test_provider_budgets_e2e_test():
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -268,7 +268,7 @@ async def test_prometheus_metric_tracking():
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
--- a/tests/local_testing/test_router_caching.py
+++ b/tests/local_testing/test_router_caching.py
@ -96,7 +96,7 @@ async def test_acompletion_caching_on_router():
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
@ -213,7 +213,7 @@ async def test_acompletion_caching_with_ttl_on_router():
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
@ -279,7 +279,7 @@ async def test_acompletion_caching_on_router_caching_groups():
            {
                "model_name": "azure-gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
--- a/tests/local_testing/test_router_client_init.py
+++ b/tests/local_testing/test_router_client_init.py
@ -43,7 +43,7 @@ async def test_router_init():
        {
            "model_name": "gpt-3.5-turbo",
            "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": os.getenv("AZURE_API_KEY"),
                "api_base": os.getenv("AZURE_API_BASE"),
                "api_version": os.getenv("AZURE_API_VERSION"),
--- a/tests/local_testing/test_router_cooldowns.py
+++ b/tests/local_testing/test_router_cooldowns.py
@ -41,7 +41,7 @@ async def test_cooldown_badrequest_error():
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
--- a/tests/local_testing/test_router_debug_logs.py
+++ b/tests/local_testing/test_router_debug_logs.py
@ -33,7 +33,7 @@ def test_async_fallbacks(caplog):
        {
            "model_name": "azure/gpt-3.5-turbo",
            "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": os.getenv("AZURE_API_KEY"),
                "api_version": os.getenv("AZURE_API_VERSION"),
                "api_base": os.getenv("AZURE_API_BASE"),
@ -93,7 +93,7 @@ def test_async_fallbacks(caplog):
    # - error request, falling back notice, success notice
    expected_logs = [
        "Falling back to model_group = azure/gpt-3.5-turbo",
-        "litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
+        "litellm.acompletion(model=azure/chatgpt-v-3)\x1b[32m 200 OK\x1b[0m",
        "Successful fallback b/w models.",
    ]

--- a/tests/local_testing/test_router_fallbacks.py
+++ b/tests/local_testing/test_router_fallbacks.py
@ -67,7 +67,7 @@ def test_sync_fallbacks():
            {  # list of model deployments
                "model_name": "azure/gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": "bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -78,7 +78,7 @@ def test_sync_fallbacks():
            {  # list of model deployments
                "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -150,7 +150,7 @@ async def test_async_fallbacks():
        {  # list of model deployments
            "model_name": "azure/gpt-3.5-turbo",  # openai model name
            "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": "bad-key",
                "api_version": os.getenv("AZURE_API_VERSION"),
                "api_base": os.getenv("AZURE_API_BASE"),
@ -161,7 +161,7 @@ async def test_async_fallbacks():
        {  # list of model deployments
            "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
            "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": os.getenv("AZURE_API_KEY"),
                "api_version": os.getenv("AZURE_API_VERSION"),
                "api_base": os.getenv("AZURE_API_BASE"),
@ -349,7 +349,7 @@ def test_dynamic_fallbacks_sync():
            {  # list of model deployments
                "model_name": "azure/gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": "bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -360,7 +360,7 @@ def test_dynamic_fallbacks_sync():
            {  # list of model deployments
                "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -426,7 +426,7 @@ async def test_dynamic_fallbacks_async():
            {  # list of model deployments
                "model_name": "azure/gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": "bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -437,7 +437,7 @@ async def test_dynamic_fallbacks_async():
            {  # list of model deployments
                "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -509,7 +509,7 @@ async def test_async_fallbacks_streaming():
        {  # list of model deployments
            "model_name": "azure/gpt-3.5-turbo",  # openai model name
            "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": "bad-key",
                "api_version": os.getenv("AZURE_API_VERSION"),
                "api_base": os.getenv("AZURE_API_BASE"),
@ -520,7 +520,7 @@ async def test_async_fallbacks_streaming():
        {  # list of model deployments
            "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
            "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": os.getenv("AZURE_API_KEY"),
                "api_version": os.getenv("AZURE_API_VERSION"),
                "api_base": os.getenv("AZURE_API_BASE"),
@ -594,7 +594,7 @@ def test_sync_fallbacks_streaming():
            {  # list of model deployments
                "model_name": "azure/gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": "bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -605,7 +605,7 @@ def test_sync_fallbacks_streaming():
            {  # list of model deployments
                "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -675,7 +675,7 @@ async def test_async_fallbacks_max_retries_per_request():
        {  # list of model deployments
            "model_name": "azure/gpt-3.5-turbo",  # openai model name
            "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": "bad-key",
                "api_version": os.getenv("AZURE_API_VERSION"),
                "api_base": os.getenv("AZURE_API_BASE"),
@ -686,7 +686,7 @@ async def test_async_fallbacks_max_retries_per_request():
        {  # list of model deployments
            "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
            "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": os.getenv("AZURE_API_KEY"),
                "api_version": os.getenv("AZURE_API_VERSION"),
                "api_base": os.getenv("AZURE_API_BASE"),
@ -808,13 +808,13 @@ def test_ausage_based_routing_fallbacks():
        model_list = [
            {
                "model_name": "azure/gpt-4-fast",
-                "litellm_params": get_azure_params("chatgpt-v-2"),
+                "litellm_params": get_azure_params("chatgpt-v-3"),
                "model_info": {"id": 1},
                "rpm": AZURE_FAST_RPM,
            },
            {
                "model_name": "azure/gpt-4-basic",
-                "litellm_params": get_azure_params("chatgpt-v-2"),
+                "litellm_params": get_azure_params("chatgpt-v-3"),
                "model_info": {"id": 2},
                "rpm": AZURE_BASIC_RPM,
            },
@ -889,7 +889,7 @@ def test_custom_cooldown_times():
            {  # list of model deployments
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": "bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -899,7 +899,7 @@ def test_custom_cooldown_times():
            {  # list of model deployments
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -993,7 +993,7 @@ async def test_service_unavailable_fallbacks(sync_mode):
            {
                "model_name": "gpt-3.5-turbo-0125-preview",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
--- a/tests/local_testing/test_router_get_deployments.py
+++ b/tests/local_testing/test_router_get_deployments.py
@ -41,7 +41,7 @@ def test_weighted_selection_router():
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
@ -54,7 +54,7 @@ def test_weighted_selection_router():
        )
        selection_counts = defaultdict(int)

-        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
+        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
        for _ in range(1000):
            selected_model = router.get_available_deployment("gpt-3.5-turbo")
            selected_model_id = selected_model["litellm_params"]["model"]
@ -64,10 +64,10 @@ def test_weighted_selection_router():

        total_requests = sum(selection_counts.values())

-        # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
+        # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
        assert (
-            selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
-        ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
+            selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
+        ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"

        router.reset()
    except Exception as e:
@ -97,7 +97,7 @@ def test_weighted_selection_router_tpm():
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
@ -110,7 +110,7 @@ def test_weighted_selection_router_tpm():
        )
        selection_counts = defaultdict(int)

-        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
+        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
        for _ in range(1000):
            selected_model = router.get_available_deployment("gpt-3.5-turbo")
            selected_model_id = selected_model["litellm_params"]["model"]
@ -120,10 +120,10 @@ def test_weighted_selection_router_tpm():

        total_requests = sum(selection_counts.values())

-        # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
+        # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
        assert (
-            selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
-        ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
+            selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
+        ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"

        router.reset()
    except Exception as e:
@ -153,7 +153,7 @@ def test_weighted_selection_router_tpm_as_router_param():
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
@ -166,7 +166,7 @@ def test_weighted_selection_router_tpm_as_router_param():
        )
        selection_counts = defaultdict(int)

-        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
+        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
        for _ in range(1000):
            selected_model = router.get_available_deployment("gpt-3.5-turbo")
            selected_model_id = selected_model["litellm_params"]["model"]
@ -176,10 +176,10 @@ def test_weighted_selection_router_tpm_as_router_param():

        total_requests = sum(selection_counts.values())

-        # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
+        # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
        assert (
-            selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
-        ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
+            selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
+        ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"

        router.reset()
    except Exception as e:
@ -210,7 +210,7 @@ def test_weighted_selection_router_rpm_as_router_param():
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
@ -224,7 +224,7 @@ def test_weighted_selection_router_rpm_as_router_param():
        )
        selection_counts = defaultdict(int)

-        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
+        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
        for _ in range(1000):
            selected_model = router.get_available_deployment("gpt-3.5-turbo")
            selected_model_id = selected_model["litellm_params"]["model"]
@ -234,10 +234,10 @@ def test_weighted_selection_router_rpm_as_router_param():

        total_requests = sum(selection_counts.values())

-        # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
+        # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
        assert (
-            selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
-        ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
+            selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
+        ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"

        router.reset()
    except Exception as e:
@ -266,7 +266,7 @@ def test_weighted_selection_router_no_rpm_set():
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
@ -286,7 +286,7 @@ def test_weighted_selection_router_no_rpm_set():
        )
        selection_counts = defaultdict(int)

-        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
+        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
        for _ in range(1000):
            selected_model = router.get_available_deployment("claude-1")
            selected_model_id = selected_model["litellm_params"]["model"]
@ -296,7 +296,7 @@ def test_weighted_selection_router_no_rpm_set():

        total_requests = sum(selection_counts.values())

-        # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
+        # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
        assert (
            selection_counts["bedrock/claude1.2"] / total_requests == 1
        ), f"Assertion failed: Selection counts {selection_counts}"
@ -325,7 +325,7 @@ def test_model_group_aliases():
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
@ -358,7 +358,7 @@ def test_model_group_aliases():
                )

        # test that
-        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
+        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
        selection_counts = defaultdict(int)
        for _ in range(1000):
            selected_model = router.get_available_deployment("gpt-3.5-turbo")
@ -369,10 +369,10 @@ def test_model_group_aliases():

        total_requests = sum(selection_counts.values())

-        # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
+        # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
        assert (
-            selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
-        ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
+            selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
+        ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"

        router.reset()
    except Exception as e:
@ -552,7 +552,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
@ -566,7 +566,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
        )
        selection_counts = defaultdict(int)

-        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
+        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
        for _ in range(1000):
            selected_model = await router.async_get_available_deployment(
                "gpt-3.5-turbo", request_kwargs={}
@ -579,13 +579,13 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
        total_requests = sum(selection_counts.values())

        if rpm_list[0] is not None or tpm_list[0] is not None:
-            # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
+            # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
            assert (
-                selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
-            ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
+                selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
+            ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
        else:
            # Assert both are used
-            assert selection_counts["azure/chatgpt-v-2"] > 0
+            assert selection_counts["azure/chatgpt-v-3"] > 0
            assert selection_counts["gpt-3.5-turbo"] > 0
        router.reset()
    except Exception as e:
--- a/tests/local_testing/test_router_init.py
+++ b/tests/local_testing/test_router_init.py
@ -40,7 +40,7 @@
 #             {
 #                 "model_name": "gpt-3.5-turbo",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_version": os.getenv("AZURE_API_VERSION"),
 #                     "api_base": os.getenv("AZURE_API_BASE"),
@ -96,7 +96,7 @@
 #             {
 #                 "model_name": "gpt-3.5-turbo",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_version": os.getenv("AZURE_API_VERSION"),
 #                     "api_base": os.getenv("AZURE_API_BASE"),
@ -134,7 +134,7 @@
 #             {
 #                 "model_name": "azure-cloudflare",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_version": os.getenv("AZURE_API_VERSION"),
 #                     "api_base": "https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1",
@ -201,7 +201,7 @@
 #             {
 #                 "model_name": "gpt-3.5-turbo",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_version": os.getenv("AZURE_API_VERSION"),
 #                     "api_base": os.getenv("AZURE_API_BASE"),
@ -254,7 +254,7 @@
 #             {
 #                 "model_name": "gpt-3.5-turbo",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_version": os.getenv("AZURE_API_VERSION"),
 #                     "api_base": os.getenv("AZURE_API_BASE"),
@ -615,7 +615,7 @@
 #             {
 #                 "model_name": "gpt-3.5-turbo",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_version": os.getenv("AZURE_API_VERSION"),
 #                     "api_base": os.getenv("AZURE_API_BASE"),
@ -660,7 +660,7 @@
 #         {
 #             "model_name": "gpt-3.5-turbo",
 #             "litellm_params": {
-#                 "model": "azure/chatgpt-v-2",
+#                 "model": "azure/chatgpt-v-3",
 #                 "api_key": os.getenv("AZURE_API_KEY"),
 #                 "api_version": os.getenv("AZURE_API_VERSION"),
 #                 "api_base": os.getenv("AZURE_API_BASE"),
--- a/tests/local_testing/test_router_policy_violation.py
+++ b/tests/local_testing/test_router_policy_violation.py
@ -69,7 +69,7 @@ async def test_async_fallbacks():
        {  # list of model deployments
            "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
            "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": os.getenv("AZURE_API_KEY"),
                "api_version": os.getenv("AZURE_API_VERSION"),
                "api_base": os.getenv("AZURE_API_BASE"),
--- a/tests/local_testing/test_router_retries.py
+++ b/tests/local_testing/test_router_retries.py
@ -166,7 +166,7 @@ async def test_router_retry_policy(error_type):
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -175,7 +175,7 @@ async def test_router_retry_policy(error_type):
            {
                "model_name": "bad-model",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": "bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -275,7 +275,7 @@ async def test_dynamic_router_retry_policy(model_group):
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -287,7 +287,7 @@ async def test_dynamic_router_retry_policy(model_group):
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -299,7 +299,7 @@ async def test_dynamic_router_retry_policy(model_group):
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -311,7 +311,7 @@ async def test_dynamic_router_retry_policy(model_group):
            {
                "model_name": "bad-model",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": "bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -393,7 +393,7 @@ def test_retry_rate_limit_error_with_healthy_deployments():
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -426,7 +426,7 @@ def test_do_retry_rate_limit_error_with_no_fallbacks_and_no_healthy_deployments(
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -459,14 +459,14 @@ def test_raise_context_window_exceeded_error():
        llm_provider="azure",
        model="gpt-3.5-turbo",
    )
-    context_window_fallbacks = [{"gpt-3.5-turbo": ["azure/chatgpt-v-2"]}]
+    context_window_fallbacks = [{"gpt-3.5-turbo": ["azure/chatgpt-v-3"]}]

    router = Router(
        model_list=[
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -508,7 +508,7 @@ def test_raise_context_window_exceeded_error_no_retry():
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -562,7 +562,7 @@ def test_timeout_for_rate_limit_error_with_healthy_deployments(
        {
            "model_name": "gpt-3.5-turbo",
            "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": os.getenv("AZURE_API_KEY"),
                "api_version": os.getenv("AZURE_API_VERSION"),
                "api_base": os.getenv("AZURE_API_BASE"),
@ -589,7 +589,7 @@ def test_timeout_for_rate_limit_error_with_healthy_deployments(
                "litellm_params": {
                    "api_key": "my-key",
                    "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com",
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                },
                "model_info": {
                    "id": "0e30bc8a63fa91ae4415d4234e231b3f9e6dd900cac57d118ce13a720d95e9d6",
@ -615,7 +615,7 @@ def test_timeout_for_rate_limit_error_with_no_healthy_deployments():
        {
            "model_name": "gpt-3.5-turbo",
            "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": os.getenv("AZURE_API_KEY"),
                "api_version": os.getenv("AZURE_API_VERSION"),
                "api_base": os.getenv("AZURE_API_BASE"),
@ -650,7 +650,7 @@ def test_no_retry_for_not_found_error_404():
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -709,7 +709,7 @@ def test_no_retry_when_no_healthy_deployments():
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
--- a/tests/local_testing/test_router_timeout.py
+++ b/tests/local_testing/test_router_timeout.py
@ -30,7 +30,7 @@ def test_router_timeouts():
        {
            "model_name": "openai-gpt-4",
            "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": "os.environ/AZURE_API_KEY",
                "api_base": "os.environ/AZURE_API_BASE",
                "api_version": "os.environ/AZURE_API_VERSION",
--- a/tests/local_testing/test_router_utils.py
+++ b/tests/local_testing/test_router_utils.py
@ -32,7 +32,7 @@ def test_returned_settings():
            {
                "model_name": "gpt-3.5-turbo",  # openai model name
                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": "bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -96,7 +96,7 @@ def test_update_kwargs_before_fallbacks_unit_test():
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": "bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
@ -133,7 +133,7 @@ async def test_update_kwargs_before_fallbacks(call_type):
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                    "api_key": "bad-key",
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
--- a/tests/local_testing/test_streaming.py
+++ b/tests/local_testing/test_streaming.py
@ -241,7 +241,7 @@ tools_schema = [
 def test_completion_azure_stream_special_char():
    litellm.set_verbose = True
    messages = [{"role": "user", "content": "hi. respond with the <xml> tag only"}]
-    response = completion(model="azure/chatgpt-v-2", messages=messages, stream=True)
+    response = completion(model="azure/chatgpt-v-3", messages=messages, stream=True)
    response_str = ""
    for part in response:
        response_str += part.choices[0].delta.content or ""
@ -449,7 +449,7 @@ def test_completion_azure_stream():
            },
        ]
        response = completion(
-            model="azure/chatgpt-v-2", messages=messages, stream=True, max_tokens=50
+            model="azure/chatgpt-v-3", messages=messages, stream=True, max_tokens=50
        )
        complete_response = ""
        # Add any assertions here to check the response
@ -2070,7 +2070,7 @@ def test_openai_chat_completion_complete_response_call():
    "model",
    [
        "gpt-3.5-turbo",
-        "azure/chatgpt-v-2",
+        "azure/chatgpt-v-3",
        "claude-3-haiku-20240307",
        "o1-preview",
        "o1",
--- a/tests/local_testing/test_timeout.py
+++ b/tests/local_testing/test_timeout.py
@ -23,7 +23,7 @@ import litellm
    [
        ("gpt-3.5-turbo", "openai"),
        ("anthropic.claude-instant-v1", "bedrock"),
-        ("azure/chatgpt-v-2", "azure"),
+        ("azure/chatgpt-v-3", "azure"),
    ],
 )
@pytest.mark.parametrize("sync_mode", [True, False])
@ -104,7 +104,7 @@ def test_hanging_request_azure():
                {
                    "model_name": "azure-gpt",
                    "litellm_params": {
-                        "model": "azure/chatgpt-v-2",
+                        "model": "azure/chatgpt-v-3",
                        "api_base": os.environ["AZURE_API_BASE"],
                        "api_key": os.environ["AZURE_API_KEY"],
                    },
@ -158,7 +158,7 @@ def test_hanging_request_openai():
                {
                    "model_name": "azure-gpt",
                    "litellm_params": {
-                        "model": "azure/chatgpt-v-2",
+                        "model": "azure/chatgpt-v-3",
                        "api_base": os.environ["AZURE_API_BASE"],
                        "api_key": os.environ["AZURE_API_KEY"],
                    },
--- a/tests/local_testing/test_tpm_rpm_routing_v2.py
+++ b/tests/local_testing/test_tpm_rpm_routing_v2.py
@ -45,7 +45,7 @@ def test_tpm_rpm_updated():
    )
    model_group = "gpt-3.5-turbo"
    deployment_id = "1234"
-    deployment = "azure/chatgpt-v-2"
+    deployment = "azure/chatgpt-v-3"
    total_tokens = 50
    standard_logging_payload: StandardLoggingPayload = create_standard_logging_payload()
    standard_logging_payload["model_group"] = model_group
@ -100,12 +100,12 @@ def test_get_available_deployments():
    model_list = [
        {
            "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
            "model_info": {"id": "1234"},
        },
        {
            "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
            "model_info": {"id": "5678"},
        },
    ]
@ -116,7 +116,7 @@ def test_get_available_deployments():
    ## DEPLOYMENT 1 ##
    total_tokens = 50
    deployment_id = "1234"
-    deployment = "azure/chatgpt-v-2"
+    deployment = "azure/chatgpt-v-3"
    standard_logging_payload = create_standard_logging_payload()
    standard_logging_payload["model_group"] = model_group
    standard_logging_payload["model_id"] = deployment_id
@ -721,7 +721,7 @@ async def test_tpm_rpm_routing_model_name_checks():
    deployment = {
        "model_name": "gpt-3.5-turbo",
        "litellm_params": {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
            "api_key": os.getenv("AZURE_API_KEY"),
            "api_base": os.getenv("AZURE_API_BASE"),
            "mock_response": "Hey, how's it going?",
@ -763,5 +763,5 @@ async def test_tpm_rpm_routing_model_name_checks():

        assert (
            standard_logging_payload["hidden_params"]["litellm_model_name"]
-            == "azure/chatgpt-v-2"
+            == "azure/chatgpt-v-3"
        )
--- a/tests/logging_callback_tests/test_alerting.py
+++ b/tests/logging_callback_tests/test_alerting.py
@ -56,7 +56,7 @@ def test_get_api_base_unit_test(model, optional_params, expected_api_base):
 async def test_get_api_base():
    _pl = ProxyLogging(user_api_key_cache=DualCache())
    _pl.update_values(alerting=["slack"], alerting_threshold=100, redis_cache=None)
-    model = "chatgpt-v-2"
+    model = "chatgpt-v-3"
    messages = [{"role": "user", "content": "Hey how's it going?"}]
    litellm_params = {
        "acompletion": True,
--- a/tests/logging_callback_tests/test_amazing_s3_logs.py
+++ b/tests/logging_callback_tests/test_amazing_s3_logs.py
@ -244,7 +244,7 @@ async def make_async_calls():
    for _ in range(5):
        task = asyncio.create_task(
            litellm.acompletion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                messages=[{"role": "user", "content": "This is a test"}],
                max_tokens=5,
                temperature=0.7,
--- a/tests/logging_callback_tests/test_spend_logs.py
+++ b/tests/logging_callback_tests/test_spend_logs.py
@ -40,7 +40,7 @@ def test_spend_logs_payload(model_id: Optional[str]):

    input_args: dict = {
        "kwargs": {
-            "model": "chatgpt-v-2",
+            "model": "chatgpt-v-3",
            "messages": [
                {"role": "system", "content": "you are a helpful assistant.\n"},
                {"role": "user", "content": "bom dia"},
@ -89,7 +89,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
                    },
                    "endpoint": "http://localhost:4000/chat/completions",
                    "model_group": "gpt-3.5-turbo",
-                    "deployment": "azure/chatgpt-v-2",
+                    "deployment": "azure/chatgpt-v-3",
                    "model_info": {
                        "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
                        "db_model": False,
@ -99,7 +99,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
                    "error_information": None,
                    "status": "success",
                    "proxy_server_request": "{}",
-                    "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
+                    "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
                },
                "model_info": {
                    "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
@ -158,7 +158,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
                "api_base": "openai-gpt-4-test-v-1.openai.azure.com",
                "acompletion": True,
                "complete_input_dict": {
-                    "model": "chatgpt-v-2",
+                    "model": "chatgpt-v-3",
                    "messages": [
                        {"role": "system", "content": "you are a helpful assistant.\n"},
                        {"role": "user", "content": "bom dia"},
--- a/tests/old_proxy_tests/tests/load_test_q.py
+++ b/tests/old_proxy_tests/tests/load_test_q.py
@ -25,7 +25,7 @@ config = {
        {
            "model_name": "gpt-3.5-turbo",
            "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                "api_key": os.environ["AZURE_API_KEY"],
                "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
                "api_version": "2023-07-01-preview",
--- a/tests/old_proxy_tests/tests/test_langchain_request.py
+++ b/tests/old_proxy_tests/tests/test_langchain_request.py
@ -9,7 +9,7 @@

 # chat = ChatOpenAI(
 #     openai_api_base="http://0.0.0.0:8000",
-#     model = "azure/chatgpt-v-2",
+#     model = "azure/chatgpt-v-3",
 #     temperature=0.1,
 #     extra_body={
 #         "metadata": {
--- a/tests/old_proxy_tests/tests/test_openai_exception_request.py
+++ b/tests/old_proxy_tests/tests/test_openai_exception_request.py
@ -39,7 +39,7 @@ client = openai.AzureOpenAI(
 )
 try:
    response = client.chat.completions.create(
-        model="chatgpt-v-2",
+        model="chatgpt-v-3",
        messages=[
            {
                "role": "user",
--- a/tests/old_proxy_tests/tests/test_openai_request.py
+++ b/tests/old_proxy_tests/tests/test_openai_request.py
@ -4,7 +4,7 @@ client = openai.OpenAI(api_key="hi", base_url="http://0.0.0.0:8000")

 # # request sent to model set on litellm proxy, `litellm --model`
 response = client.chat.completions.create(
-    model="azure/chatgpt-v-2",
+    model="azure/chatgpt-v-3",
    messages=[
        {"role": "user", "content": "this is a test request, write a short poem"}
    ],
--- a/tests/proxy_unit_tests/example_config_yaml/azure_config.yaml
+++ b/tests/proxy_unit_tests/example_config_yaml/azure_config.yaml
@ -1,7 +1,7 @@
 model_list:
  - model_name: gpt-4-team1
    litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
      api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
      api_version: "2023-05-15"
      api_key:  os.environ/AZURE_API_KEY
--- a/tests/proxy_unit_tests/test_configs/test_bad_config.yaml
+++ b/tests/proxy_unit_tests/test_configs/test_bad_config.yaml
@ -5,12 +5,12 @@ model_list:
      model: gpt-3.5-turbo
  - model_name: working-azure-gpt-3.5-turbo
    litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
      api_base: os.environ/AZURE_API_BASE
      api_key: os.environ/AZURE_API_KEY
  - model_name: azure-gpt-3.5-turbo
    litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
      api_base: os.environ/AZURE_API_BASE
      api_key: bad-key
  - model_name: azure-embedding
--- a/tests/proxy_unit_tests/test_configs/test_cloudflare_azure_with_cache_config.yaml
+++ b/tests/proxy_unit_tests/test_configs/test_cloudflare_azure_with_cache_config.yaml
@ -1,7 +1,7 @@
 model_list:
  - model_name: azure-cloudflare
    litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
      api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
      api_key: os.environ/AZURE_API_KEY
      api_version: 2023-07-01-preview
--- a/tests/proxy_unit_tests/test_configs/test_config_no_auth.yaml
+++ b/tests/proxy_unit_tests/test_configs/test_config_no_auth.yaml
@ -12,7 +12,7 @@ model_list:
 - litellm_params:
    api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
    api_key: os.environ/AZURE_API_KEY
-    model: azure/chatgpt-v-2
+    model: azure/chatgpt-v-3
  model_name: azure-cloudflare-model
 - litellm_params:
    api_base: https://openai-france-1234.openai.azure.com
--- a/tests/proxy_unit_tests/test_configs/test_custom_logger.yaml
+++ b/tests/proxy_unit_tests/test_configs/test_custom_logger.yaml
@ -1,7 +1,7 @@
 model_list: 
  - model_name: Azure OpenAI GPT-4 Canada
    litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
      api_base: os.environ/AZURE_API_BASE
      api_key: os.environ/AZURE_API_KEY
      api_version: "2023-07-01-preview"
--- a/tests/proxy_unit_tests/test_key_generate_prisma.py
+++ b/tests/proxy_unit_tests/test_key_generate_prisma.py
@ -1546,7 +1546,7 @@ def test_call_with_key_over_budget(prisma_client):
            )
            await proxy_db_logger._PROXY_track_cost_callback(
                kwargs={
-                    "model": "chatgpt-v-2",
+                    "model": "chatgpt-v-3",
                    "stream": False,
                    "litellm_params": {
                        "metadata": {
@ -1578,7 +1578,7 @@ def test_call_with_key_over_budget(prisma_client):

            assert spend_log.request_id == request_id
            assert spend_log.spend == float("2e-05")
-            assert spend_log.model == "chatgpt-v-2"
+            assert spend_log.model == "chatgpt-v-3"
            assert (
                spend_log.cache_key
                == "c891d64397a472e6deb31b87a5ac4d3ed5b2dcc069bc87e2afe91e6d64e95a1e"
@ -1669,7 +1669,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
            proxy_db_logger = _ProxyDBLogger()
            await proxy_db_logger._PROXY_track_cost_callback(
                kwargs={
-                    "model": "chatgpt-v-2",
+                    "model": "chatgpt-v-3",
                    "stream": False,
                    "litellm_params": {
                        "metadata": {
@ -1702,7 +1702,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):

            assert spend_log.request_id == request_id
            assert spend_log.spend == float("2e-05")
-            assert spend_log.model == "chatgpt-v-2"
+            assert spend_log.model == "chatgpt-v-3"
            assert (
                spend_log.cache_key
                == "c891d64397a472e6deb31b87a5ac4d3ed5b2dcc069bc87e2afe91e6d64e95a1e"
@ -1757,7 +1757,7 @@ async def test_call_with_key_over_model_budget(

    try:

-        # set budget for chatgpt-v-2 to 0.000001, expect the next request to fail
+        # set budget for chatgpt-v-3 to 0.000001, expect the next request to fail
        model_max_budget = {
            "gpt-4o-mini": {
                "budget_limit": "0.000001",
@ -1898,7 +1898,7 @@ async def test_call_with_key_never_over_budget(prisma_client):
        )
        await proxy_db_logger._PROXY_track_cost_callback(
            kwargs={
-                "model": "chatgpt-v-2",
+                "model": "chatgpt-v-3",
                "stream": False,
                "litellm_params": {
                    "metadata": {
@ -1987,7 +1987,7 @@ async def test_call_with_key_over_budget_stream(prisma_client):
        await proxy_db_logger._PROXY_track_cost_callback(
            kwargs={
                "call_type": "acompletion",
-                "model": "sagemaker-chatgpt-v-2",
+                "model": "sagemaker-chatgpt-v-3",
                "stream": True,
                "complete_streaming_response": resp,
                "litellm_params": {
@ -2431,7 +2431,7 @@ async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
    await proxy_db_logger._PROXY_track_cost_callback(
        kwargs={
            "call_type": "acompletion",
-            "model": "sagemaker-chatgpt-v-2",
+            "model": "sagemaker-chatgpt-v-3",
            "stream": True,
            "complete_streaming_response": resp,
            "litellm_params": {
--- a/tests/proxy_unit_tests/test_proxy_custom_logger.py
+++ b/tests/proxy_unit_tests/test_proxy_custom_logger.py
@ -164,7 +164,7 @@ def test_chat_completion(client):
            my_custom_logger.async_success == True
        )  # checks if the status of async_success is True, only the async_log_success_event can set this to true
        assert (
-            my_custom_logger.async_completion_kwargs["model"] == "chatgpt-v-2"
+            my_custom_logger.async_completion_kwargs["model"] == "chatgpt-v-3"
        )  # checks if kwargs passed to async_log_success_event are correct
        print(
            "\n\n Custom Logger Async Completion args",
--- a/tests/proxy_unit_tests/test_proxy_pass_user_config.py
+++ b/tests/proxy_unit_tests/test_proxy_pass_user_config.py
@ -64,7 +64,7 @@ def test_chat_completion(client_no_auth):
            ModelConfig(
                model_name="user-azure-instance",
                litellm_params=CompletionRequest(
-                    model="azure/chatgpt-v-2",
+                    model="azure/chatgpt-v-3",
                    api_key=os.getenv("AZURE_API_KEY"),
                    api_version=os.getenv("AZURE_API_VERSION"),
                    api_base=os.getenv("AZURE_API_BASE"),
--- a/tests/proxy_unit_tests/test_proxy_server.py
+++ b/tests/proxy_unit_tests/test_proxy_server.py
@ -446,7 +446,7 @@ def test_chat_completion_azure(mock_acompletion, client_no_auth):
    try:
        # Your test data
        test_data = {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
            "messages": [
                {"role": "user", "content": "write 1 sentence poem"},
            ],
@ -457,7 +457,7 @@ def test_chat_completion_azure(mock_acompletion, client_no_auth):
        response = client_no_auth.post("/v1/chat/completions", json=test_data)

        mock_acompletion.assert_called_once_with(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=[
                {"role": "user", "content": "write 1 sentence poem"},
            ],
@ -489,19 +489,19 @@ def test_openai_deployments_model_chat_completions_azure(
    try:
        # Your test data
        test_data = {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
            "messages": [
                {"role": "user", "content": "write 1 sentence poem"},
            ],
            "max_tokens": 10,
        }

-        url = "/openai/deployments/azure/chatgpt-v-2/chat/completions"
+        url = "/openai/deployments/azure/chatgpt-v-3/chat/completions"
        print(f"testing proxy server with Azure Request {url}")
        response = client_no_auth.post(url, json=test_data)

        mock_acompletion.assert_called_once_with(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
            messages=[
                {"role": "user", "content": "write 1 sentence poem"},
            ],
@ -1314,7 +1314,7 @@ async def test_add_callback_via_key(prisma_client):
    try:
        # Your test data
        test_data = {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
            "messages": [
                {"role": "user", "content": "write 1 sentence poem"},
            ],
@ -1408,7 +1408,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(
    request._url = URL(url="/chat/completions")

    test_data = {
-        "model": "azure/chatgpt-v-2",
+        "model": "azure/chatgpt-v-3",
        "messages": [
            {"role": "user", "content": "write 1 sentence poem"},
        ],
@ -1423,7 +1423,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(

    data = {
        "data": {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
            "messages": [{"role": "user", "content": "write 1 sentence poem"}],
            "max_tokens": 10,
            "mock_response": "Hello world",
@ -1523,7 +1523,7 @@ async def test_disable_fallbacks_by_key(disable_fallbacks_set):

    key_metadata = {"disable_fallbacks": disable_fallbacks_set}
    existing_data = {
-        "model": "azure/chatgpt-v-2",
+        "model": "azure/chatgpt-v-3",
        "messages": [{"role": "user", "content": "write 1 sentence poem"}],
    }
    data = LiteLLMProxyRequestSetup.add_key_level_controls(
@ -1564,7 +1564,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_gcs_bucket(
    request._url = URL(url="/chat/completions")

    test_data = {
-        "model": "azure/chatgpt-v-2",
+        "model": "azure/chatgpt-v-3",
        "messages": [
            {"role": "user", "content": "write 1 sentence poem"},
        ],
@ -1579,7 +1579,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_gcs_bucket(

    data = {
        "data": {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
            "messages": [{"role": "user", "content": "write 1 sentence poem"}],
            "max_tokens": 10,
            "mock_response": "Hello world",
@ -1697,7 +1697,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_langsmith(
    request._url = URL(url="/chat/completions")

    test_data = {
-        "model": "azure/chatgpt-v-2",
+        "model": "azure/chatgpt-v-3",
        "messages": [
            {"role": "user", "content": "write 1 sentence poem"},
        ],
@ -1712,7 +1712,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_langsmith(

    data = {
        "data": {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
            "messages": [{"role": "user", "content": "write 1 sentence poem"}],
            "max_tokens": 10,
            "mock_response": "Hello world",
--- a/tests/proxy_unit_tests/test_proxy_server_keys.py
+++ b/tests/proxy_unit_tests/test_proxy_server_keys.py
@ -171,7 +171,7 @@
 #         model_data = {
 #             "model_name": "azure-model",
 #             "litellm_params": {
-#                 "model": "azure/chatgpt-v-2",
+#                 "model": "azure/chatgpt-v-3",
 #                 "api_key": os.getenv("AZURE_API_KEY"),
 #                 "api_base": os.getenv("AZURE_API_BASE"),
 #                 "api_version": os.getenv("AZURE_API_VERSION")
--- a/tests/test_models.py
+++ b/tests/test_models.py
@ -67,7 +67,7 @@ async def add_models(session, model_id="123", model_name="azure-gpt-3.5", key="s
    data = {
        "model_name": model_name,
        "litellm_params": {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
            "api_key": "os.environ/AZURE_API_KEY",
            "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
            "api_version": "2023-05-15",
@ -100,7 +100,7 @@ async def update_model(session, model_id="123", model_name="azure-gpt-3.5", key=
    data = {
        "model_name": model_name,
        "litellm_params": {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
            "api_key": "os.environ/AZURE_API_KEY",
            "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
            "api_version": "2023-05-15",
@ -292,7 +292,7 @@ async def add_model_for_health_checking(session, model_id="123"):
    data = {
        "model_name": f"azure-model-health-check-{model_id}",
        "litellm_params": {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
            "api_key": os.getenv("AZURE_API_KEY"),
            "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
            "api_version": "2023-05-15",
@ -417,7 +417,7 @@ async def test_add_model_run_health():

        assert _health_info["healthy_count"] == 1
        assert (
-            _healthy_endpooint["model"] == "azure/chatgpt-v-2"
+            _healthy_endpooint["model"] == "azure/chatgpt-v-3"
        )  # this is the model that got added

        # assert httpx client is is unchanges