From f7dd68803564779318f320fbfdfe4e5345ab89eb Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Fri, 18 Apr 2025 08:42:12 -0700
Subject: [PATCH 1/5] test: handle cohere rbac issue (verified happens on
 calling azure directly)

---
 tests/local_testing/test_completion.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py
index d56475e1a0..041a49cb99 100644
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@@ -131,15 +131,15 @@ def test_null_role_response():
 
         assert response.choices[0].message.role == "assistant"
 
-
+@pytest.mark.skip(reason="Cohere having RBAC issues")
 def test_completion_azure_command_r():
     try:
-        litellm.set_verbose = True
+        litellm._turn_on_debug()
 
         response = completion(
             model="azure/command-r-plus",
-            api_base=os.getenv("AZURE_COHERE_API_BASE"),
-            api_key=os.getenv("AZURE_COHERE_API_KEY"),
+            api_base="https://Cohere-command-r-plus-gylpd-serverless.eastus2.inference.ai.azure.com",
+            api_key="AO89xyvmOLLMgoMI7WaiEaP0t6M09itr",
             messages=[{"role": "user", "content": "What is the meaning of life?"}],
         )
 

From de3c2d14bff66048c313d5768d46e3577982176a Mon Sep 17 00:00:00 2001
From: David Emmanuel <davidemmanuel75@gmail.com>
Date: Fri, 18 Apr 2025 17:44:46 +0100
Subject: [PATCH 2/5] Add Gemini Flash 2.5 Preview Model Price and Context
 Window  (#10125)

* Update model_prices_and_context_window_backup.json

* Update model_prices_and_context_window.json
---
 ...odel_prices_and_context_window_backup.json | 27 +++++++++++++++++++
 model_prices_and_context_window.json          | 27 +++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 8a10b97b1e..4b0d657e05 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -5168,6 +5168,33 @@
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
+    "gemini/gemini-2.5-flash-preview-04-17": {
+        "max_tokens": 65536,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 65536,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_audio_token": 0.0000001,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000060,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "rpm": 10,
+        "tpm": 250000,
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": false,
+        "supports_tool_choice": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
+        "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview"
+    },
     "gemini-2.0-flash": {
         "max_tokens": 8192,
         "max_input_tokens": 1048576,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 8a10b97b1e..4b0d657e05 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -5168,6 +5168,33 @@
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
+    "gemini/gemini-2.5-flash-preview-04-17": {
+        "max_tokens": 65536,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 65536,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_audio_token": 0.0000001,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000060,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "rpm": 10,
+        "tpm": 250000,
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": false,
+        "supports_tool_choice": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
+        "supported_output_modalities": ["text"],
+        "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview"
+    },
     "gemini-2.0-flash": {
         "max_tokens": 8192,
         "max_input_tokens": 1048576,

From 415abfc222a01bac7e67e3c547ef64421022c23d Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Fri, 18 Apr 2025 13:13:58 -0700
Subject: [PATCH 3/5] test: update test

---
 tests/local_testing/test_completion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py
index 041a49cb99..2294ca4f0f 100644
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@@ -1332,7 +1332,7 @@ def test_completion_fireworks_ai():
             },
         ]
         response = completion(
-            model="fireworks_ai/mixtral-8x7b-instruct",
+            model="fireworks_ai/llama4-maverick-instruct-basic",
             messages=messages,
         )
         print(response)

From 1ea046cc617f277e6d7d526645b0392a5f1ad149 Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Fri, 18 Apr 2025 14:22:12 -0700
Subject: [PATCH 4/5] test: update tests to new deployment model (#10142)

* test: update tests to new deployment model

* test: update model name

* test: skip cohere rbac issue test

* test: update test - replace gpt-4o model
---
 tests/llm_translation/test_azure_ai.py        | 23 ++++---
 tests/llm_translation/test_azure_openai.py    |  8 +--
 tests/llm_translation/test_optional_params.py |  4 +-
 tests/llm_translation/test_rerank.py          |  1 +
 tests/load_tests/test_datadog_load_test.py    |  2 +-
 tests/load_tests/test_otel_load_test.py       |  2 +-
 .../example_config_yaml/azure_config.yaml     |  2 +-
 tests/local_testing/test_acooldowns_router.py | 10 +--
 tests/local_testing/test_alangfuse.py         |  2 +-
 tests/local_testing/test_assistants.py        |  2 +-
 tests/local_testing/test_azure_openai.py      |  4 +-
 tests/local_testing/test_azure_perf.py        |  4 +-
 tests/local_testing/test_caching.py           | 14 ++--
 tests/local_testing/test_caching_ssl.py       |  2 +-
 tests/local_testing/test_class.py             |  4 +-
 tests/local_testing/test_completion.py        | 34 +++++-----
 tests/local_testing/test_config.py            |  6 +-
 .../test_configs/test_bad_config.yaml         |  4 +-
 ...st_cloudflare_azure_with_cache_config.yaml |  2 +-
 .../test_configs/test_config_no_auth.yaml     |  2 +-
 .../test_configs/test_custom_logger.yaml      |  2 +-
 .../test_custom_callback_input.py             | 22 +++---
 .../test_custom_callback_router.py            | 10 +--
 tests/local_testing/test_custom_logger.py     |  6 +-
 tests/local_testing/test_exceptions.py        | 22 +++---
 tests/local_testing/test_gcs_bucket.py        | 12 ++--
 tests/local_testing/test_health_check.py      |  2 +-
 .../test_helicone_integration.py              |  2 +-
 .../local_testing/test_least_busy_routing.py  |  6 +-
 .../local_testing/test_load_test_router_s3.py |  2 +-
 tests/local_testing/test_loadtest_router.py   |  2 +-
 .../local_testing/test_lowest_cost_routing.py |  2 +-
 .../test_lowest_latency_routing.py            | 28 ++++----
 tests/local_testing/test_mem_usage.py         |  4 +-
 tests/local_testing/test_mock_request.py      |  2 +-
 .../local_testing/test_prometheus_service.py  |  4 +-
 .../test_prompt_injection_detection.py        |  2 +-
 .../test_provider_specific_config.py          |  4 +-
 tests/local_testing/test_router.py            | 28 ++++----
 .../test_router_budget_limiter.py             |  4 +-
 tests/local_testing/test_router_caching.py    |  6 +-
 .../local_testing/test_router_client_init.py  |  2 +-
 tests/local_testing/test_router_cooldowns.py  |  2 +-
 tests/local_testing/test_router_debug_logs.py |  4 +-
 tests/local_testing/test_router_fallbacks.py  | 38 +++++------
 .../test_router_get_deployments.py            | 68 +++++++++----------
 tests/local_testing/test_router_init.py       | 14 ++--
 .../test_router_policy_violation.py           |  2 +-
 tests/local_testing/test_router_retries.py    | 32 ++++-----
 tests/local_testing/test_router_timeout.py    |  2 +-
 tests/local_testing/test_router_utils.py      |  6 +-
 tests/local_testing/test_streaming.py         |  6 +-
 tests/local_testing/test_timeout.py           |  6 +-
 .../local_testing/test_tpm_rpm_routing_v2.py  | 12 ++--
 tests/logging_callback_tests/test_alerting.py |  2 +-
 .../test_amazing_s3_logs.py                   |  2 +-
 .../logging_callback_tests/test_spend_logs.py |  8 +--
 tests/old_proxy_tests/tests/load_test_q.py    |  2 +-
 .../tests/test_langchain_request.py           |  2 +-
 .../tests/test_openai_exception_request.py    |  2 +-
 .../tests/test_openai_request.py              |  2 +-
 .../example_config_yaml/azure_config.yaml     |  2 +-
 .../test_configs/test_bad_config.yaml         |  4 +-
 ...st_cloudflare_azure_with_cache_config.yaml |  2 +-
 .../test_configs/test_config_no_auth.yaml     |  2 +-
 .../test_configs/test_custom_logger.yaml      |  2 +-
 .../test_key_generate_prisma.py               | 16 ++---
 .../test_proxy_custom_logger.py               |  2 +-
 .../test_proxy_pass_user_config.py            |  2 +-
 tests/proxy_unit_tests/test_proxy_server.py   | 26 +++----
 .../test_proxy_server_keys.py                 |  2 +-
 tests/test_models.py                          |  8 +--
 72 files changed, 294 insertions(+), 292 deletions(-)

diff --git a/tests/llm_translation/test_azure_ai.py b/tests/llm_translation/test_azure_ai.py
index 6ec2050638..62f68b02d3 100644
--- a/tests/llm_translation/test_azure_ai.py
+++ b/tests/llm_translation/test_azure_ai.py
@@ -14,7 +14,7 @@ from litellm.llms.anthropic.chat import ModelResponseIterator
 import httpx
 import json
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
-from base_rerank_unit_tests import BaseLLMRerankTest
+# from base_rerank_unit_tests import BaseLLMRerankTest
 
 load_dotenv()
 import io
@@ -255,16 +255,17 @@ def test_azure_deepseek_reasoning_content():
         assert response.choices[0].message.content == "\n\nThe sky is a canvas of blue"
 
 
-class TestAzureAIRerank(BaseLLMRerankTest):
-    def get_custom_llm_provider(self) -> litellm.LlmProviders:
-        return litellm.LlmProviders.AZURE_AI
+# skipping due to cohere rbac issues
+# class TestAzureAIRerank(BaseLLMRerankTest):
+#     def get_custom_llm_provider(self) -> litellm.LlmProviders:
+#         return litellm.LlmProviders.AZURE_AI
 
-    def get_base_rerank_call_args(self) -> dict:
-        return {
-            "model": "azure_ai/cohere-rerank-v3-english",
-            "api_base": os.getenv("AZURE_AI_COHERE_API_BASE"),
-            "api_key": os.getenv("AZURE_AI_COHERE_API_KEY"),
-        }
+#     def get_base_rerank_call_args(self) -> dict:
+#         return {
+#             "model": "azure_ai/cohere-rerank-v3-english",
+#             "api_base": os.getenv("AZURE_AI_COHERE_API_BASE"),
+#             "api_key": os.getenv("AZURE_AI_COHERE_API_KEY"),
+#         }
 
 
 @pytest.mark.asyncio
@@ -279,7 +280,7 @@ async def test_azure_ai_request_format():
 
     # Set up the test parameters
     api_key = os.getenv("AZURE_API_KEY")
-    api_base = f"{os.getenv('AZURE_API_BASE')}/openai/deployments/gpt-4o/chat/completions?api-version=2024-08-01-preview"
+    api_base = f"{os.getenv('AZURE_API_BASE')}/openai/deployments/gpt-4o-new-test/chat/completions?api-version=2024-08-01-preview"
     model = "azure_ai/gpt-4o"
     messages = [
         {"role": "user", "content": "hi"},
diff --git a/tests/llm_translation/test_azure_openai.py b/tests/llm_translation/test_azure_openai.py
index d289c892a0..72ea3ec27e 100644
--- a/tests/llm_translation/test_azure_openai.py
+++ b/tests/llm_translation/test_azure_openai.py
@@ -137,7 +137,7 @@ def test_azure_extra_headers(input, call_type, header_value):
                 func = image_generation
 
             data = {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com",
                 "api_version": "2023-07-01-preview",
                 "api_key": "my-azure-api-key",
@@ -339,7 +339,7 @@ def test_azure_gpt_4o_with_tool_call_and_response_format(api_version):
 
     with patch.object(client.chat.completions.with_raw_response, "create") as mock_post:
         response = litellm.completion(
-            model="azure/gpt-4o",
+            model="azure/gpt-4o-new-test",
             messages=[
                 {
                     "role": "system",
@@ -474,7 +474,7 @@ def test_azure_max_retries_0(
 
     try:
         completion(
-            model="azure/gpt-4o",
+            model="azure/gpt-4o-new-test",
             messages=[{"role": "user", "content": "Hello world"}],
             max_retries=max_retries,
             stream=stream,
@@ -502,7 +502,7 @@ async def test_async_azure_max_retries_0(
 
     try:
         await acompletion(
-            model="azure/gpt-4o",
+            model="azure/gpt-4o-new-test",
             messages=[{"role": "user", "content": "Hello world"}],
             max_retries=max_retries,
             stream=stream,
diff --git a/tests/llm_translation/test_optional_params.py b/tests/llm_translation/test_optional_params.py
index 4545ebbe20..e207e367e4 100644
--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@@ -217,7 +217,7 @@ def test_openai_optional_params_embeddings():
 def test_azure_optional_params_embeddings():
     litellm.drop_params = True
     optional_params = get_optional_params_embeddings(
-        model="chatgpt-v-2",
+        model="chatgpt-v-3",
         user="John",
         encoding_format=None,
         custom_llm_provider="azure",
@@ -396,7 +396,7 @@ def test_azure_tool_choice(api_version):
     """
     litellm.drop_params = True
     optional_params = litellm.utils.get_optional_params(
-        model="chatgpt-v-2",
+        model="chatgpt-v-3",
         user="John",
         custom_llm_provider="azure",
         max_tokens=10,
diff --git a/tests/llm_translation/test_rerank.py b/tests/llm_translation/test_rerank.py
index 5de6c1a8ec..e9d9e38951 100644
--- a/tests/llm_translation/test_rerank.py
+++ b/tests/llm_translation/test_rerank.py
@@ -150,6 +150,7 @@ async def test_basic_rerank_together_ai(sync_mode):
 
 @pytest.mark.asyncio()
 @pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.skip(reason="Skipping test due to Cohere RBAC issues")
 async def test_basic_rerank_azure_ai(sync_mode):
     import os
 
diff --git a/tests/load_tests/test_datadog_load_test.py b/tests/load_tests/test_datadog_load_test.py
index b56c82288e..f4328b71b1 100644
--- a/tests/load_tests/test_datadog_load_test.py
+++ b/tests/load_tests/test_datadog_load_test.py
@@ -91,7 +91,7 @@ async def make_async_calls(metadata=None, **completion_kwargs):
 def create_async_task(**completion_kwargs):
     litellm.set_verbose = True
     completion_args = {
-        "model": "openai/chatgpt-v-2",
+        "model": "openai/chatgpt-v-3",
         "api_version": "2024-02-01",
         "messages": [{"role": "user", "content": "This is a test"}],
         "max_tokens": 5,
diff --git a/tests/load_tests/test_otel_load_test.py b/tests/load_tests/test_otel_load_test.py
index 50e5748686..f5754c0c40 100644
--- a/tests/load_tests/test_otel_load_test.py
+++ b/tests/load_tests/test_otel_load_test.py
@@ -86,7 +86,7 @@ def create_async_task(**completion_kwargs):
     By default a standard set of arguments are used for the litellm.acompletion function.
     """
     completion_args = {
-        "model": "openai/chatgpt-v-2",
+        "model": "openai/chatgpt-v-3",
         "api_version": "2024-02-01",
         "messages": [{"role": "user", "content": "This is a test" * 100}],
         "max_tokens": 5,
diff --git a/tests/local_testing/example_config_yaml/azure_config.yaml b/tests/local_testing/example_config_yaml/azure_config.yaml
index fd5865cd7c..111813c884 100644
--- a/tests/local_testing/example_config_yaml/azure_config.yaml
+++ b/tests/local_testing/example_config_yaml/azure_config.yaml
@@ -1,7 +1,7 @@
 model_list:
   - model_name: gpt-4-team1
     litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
       api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
       api_version: "2023-05-15"
       api_key:  os.environ/AZURE_API_KEY
diff --git a/tests/local_testing/test_acooldowns_router.py b/tests/local_testing/test_acooldowns_router.py
index df3f493a68..8427fd2be8 100644
--- a/tests/local_testing/test_acooldowns_router.py
+++ b/tests/local_testing/test_acooldowns_router.py
@@ -26,7 +26,7 @@ model_list = [
     {  # list of model deployments
         "model_name": "gpt-3.5-turbo",  # openai model name
         "litellm_params": {  # params for litellm completion/embedding call
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
             "api_key": "bad-key",
             "api_version": os.getenv("AZURE_API_VERSION"),
             "api_base": os.getenv("AZURE_API_BASE"),
@@ -143,7 +143,7 @@ async def test_cooldown_same_model_name(sync_mode):
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": "bad-key",
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -153,7 +153,7 @@ async def test_cooldown_same_model_name(sync_mode):
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -184,7 +184,7 @@ async def test_cooldown_same_model_name(sync_mode):
                 model_ids.append(model["model_info"]["id"])
             print("\n litellm model ids ", model_ids)
 
-            # example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
+            # example litellm_model_names ['azure/chatgpt-v-3-ModelID-64321', 'azure/chatgpt-v-3-ModelID-63960']
             assert (
                 model_ids[0] != model_ids[1]
             )  # ensure both models have a uuid added, and they have different names
@@ -201,7 +201,7 @@ async def test_cooldown_same_model_name(sync_mode):
                 model_ids.append(model["model_info"]["id"])
             print("\n litellm model ids ", model_ids)
 
-            # example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
+            # example litellm_model_names ['azure/chatgpt-v-3-ModelID-64321', 'azure/chatgpt-v-3-ModelID-63960']
             assert (
                 model_ids[0] != model_ids[1]
             )  # ensure both models have a uuid added, and they have different names
diff --git a/tests/local_testing/test_alangfuse.py b/tests/local_testing/test_alangfuse.py
index cdcf18f79f..2eea426478 100644
--- a/tests/local_testing/test_alangfuse.py
+++ b/tests/local_testing/test_alangfuse.py
@@ -194,7 +194,7 @@ def create_async_task(**completion_kwargs):
     By default a standard set of arguments are used for the litellm.acompletion function.
     """
     completion_args = {
-        "model": "azure/chatgpt-v-2",
+        "model": "azure/chatgpt-v-3",
         "api_version": "2024-02-01",
         "messages": [{"role": "user", "content": "This is a test"}],
         "max_tokens": 5,
diff --git a/tests/local_testing/test_assistants.py b/tests/local_testing/test_assistants.py
index 544523e4a0..d5755f2aba 100644
--- a/tests/local_testing/test_assistants.py
+++ b/tests/local_testing/test_assistants.py
@@ -71,7 +71,7 @@ async def test_create_delete_assistants(provider, sync_mode):
     model = "gpt-4-turbo"
     if provider == "azure":
         os.environ["AZURE_API_VERSION"] = "2024-05-01-preview"
-        model = "chatgpt-v-2"
+        model = "chatgpt-v-3"
 
     if sync_mode == True:
         assistant = litellm.create_assistants(
diff --git a/tests/local_testing/test_azure_openai.py b/tests/local_testing/test_azure_openai.py
index 4ca2a20011..8e2d2aa509 100644
--- a/tests/local_testing/test_azure_openai.py
+++ b/tests/local_testing/test_azure_openai.py
@@ -46,7 +46,7 @@ async def test_aaaaazure_tenant_id_auth(respx_mock: MockRouter):
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_base": os.getenv("AZURE_API_BASE"),
                     "tenant_id": os.getenv("AZURE_TENANT_ID"),
                     "client_id": os.getenv("AZURE_CLIENT_ID"),
@@ -95,6 +95,6 @@ async def test_aaaaazure_tenant_id_auth(respx_mock: MockRouter):
 
         assert json_body == {
             "messages": [{"role": "user", "content": "Hello world!"}],
-            "model": "chatgpt-v-2",
+            "model": "chatgpt-v-3",
             "stream": False,
         }
diff --git a/tests/local_testing/test_azure_perf.py b/tests/local_testing/test_azure_perf.py
index b7d7abd553..bc6d694b78 100644
--- a/tests/local_testing/test_azure_perf.py
+++ b/tests/local_testing/test_azure_perf.py
@@ -18,7 +18,7 @@
 #     {
 #         "model_name": "azure-test",
 #         "litellm_params": {
-#             "model": "azure/chatgpt-v-2",
+#             "model": "azure/chatgpt-v-3",
 #             "api_key": os.getenv("AZURE_API_KEY"),
 #             "api_base": os.getenv("AZURE_API_BASE"),
 #             "api_version": os.getenv("AZURE_API_VERSION"),
@@ -33,7 +33,7 @@
 #     try:
 #         start_time = time.time()
 #         response = await client.chat.completions.create(
-#             model="chatgpt-v-2",
+#             model="chatgpt-v-3",
 #             messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
 #             stream=True,
 #         )
diff --git a/tests/local_testing/test_caching.py b/tests/local_testing/test_caching.py
index 43dafd7293..df0b625d7d 100644
--- a/tests/local_testing/test_caching.py
+++ b/tests/local_testing/test_caching.py
@@ -324,7 +324,7 @@ def test_caching_with_models_v2():
     litellm.set_verbose = True
     response1 = completion(model="gpt-3.5-turbo", messages=messages, caching=True)
     response2 = completion(model="gpt-3.5-turbo", messages=messages, caching=True)
-    response3 = completion(model="azure/chatgpt-v-2", messages=messages, caching=True)
+    response3 = completion(model="azure/chatgpt-v-3", messages=messages, caching=True)
     print(f"response1: {response1}")
     print(f"response2: {response2}")
     print(f"response3: {response3}")
@@ -1170,7 +1170,7 @@ async def test_s3_cache_stream_azure(sync_mode):
 
         if sync_mode:
             response1 = litellm.completion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                 messages=messages,
                 max_tokens=40,
                 temperature=1,
@@ -1183,7 +1183,7 @@ async def test_s3_cache_stream_azure(sync_mode):
             print(response_1_content)
         else:
             response1 = await litellm.acompletion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                 messages=messages,
                 max_tokens=40,
                 temperature=1,
@@ -1203,7 +1203,7 @@ async def test_s3_cache_stream_azure(sync_mode):
 
         if sync_mode:
             response2 = litellm.completion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                 messages=messages,
                 max_tokens=40,
                 temperature=1,
@@ -1216,7 +1216,7 @@ async def test_s3_cache_stream_azure(sync_mode):
             print(response_2_content)
         else:
             response2 = await litellm.acompletion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                 messages=messages,
                 max_tokens=40,
                 temperature=1,
@@ -1279,7 +1279,7 @@ async def test_s3_cache_acompletion_azure():
         print("s3 Cache: test for caching, streaming + completion")
 
         response1 = await litellm.acompletion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=messages,
             max_tokens=40,
             temperature=1,
@@ -1289,7 +1289,7 @@ async def test_s3_cache_acompletion_azure():
         time.sleep(2)
 
         response2 = await litellm.acompletion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=messages,
             max_tokens=40,
             temperature=1,
diff --git a/tests/local_testing/test_caching_ssl.py b/tests/local_testing/test_caching_ssl.py
index 1b642f7674..8194115ef1 100644
--- a/tests/local_testing/test_caching_ssl.py
+++ b/tests/local_testing/test_caching_ssl.py
@@ -58,7 +58,7 @@ def test_caching_router():
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
diff --git a/tests/local_testing/test_class.py b/tests/local_testing/test_class.py
index a15f362372..e6b711efe8 100644
--- a/tests/local_testing/test_class.py
+++ b/tests/local_testing/test_class.py
@@ -55,7 +55,7 @@
 # #             {
 # #                 "model_name": "gpt-3.5-turbo",  # openai model name
 # #                 "litellm_params": {  # params for litellm completion/embedding call
-# #                     "model": "azure/chatgpt-v-2",
+# #                     "model": "azure/chatgpt-v-3",
 # #                     "api_key": os.getenv("AZURE_API_KEY"),
 # #                     "api_version": os.getenv("AZURE_API_VERSION"),
 # #                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -93,7 +93,7 @@
 # #             {
 # #                 "model_name": "gpt-3.5-turbo",  # openai model name
 # #                 "litellm_params": {  # params for litellm completion/embedding call
-# #                     "model": "azure/chatgpt-v-2",
+# #                     "model": "azure/chatgpt-v-3",
 # #                     "api_key": os.getenv("AZURE_API_KEY"),
 # #                     "api_version": os.getenv("AZURE_API_VERSION"),
 # #                     "api_base": os.getenv("AZURE_API_BASE"),
diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py
index 2294ca4f0f..9f573662f7 100644
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@@ -732,7 +732,7 @@ def encode_image(image_path):
     "model",
     [
         "gpt-4o",
-        "azure/gpt-4o",
+        "azure/gpt-4o-new-test",
         "anthropic/claude-3-opus-20240229",
     ],
 )  #
@@ -1824,9 +1824,9 @@ def test_completion_openai():
     "model, api_version",
     [
         # ("gpt-4o-2024-08-06", None),
-        # ("azure/chatgpt-v-2", None),
+        # ("azure/chatgpt-v-3", None),
         ("bedrock/anthropic.claude-3-sonnet-20240229-v1:0", None),
-        # ("azure/gpt-4o", "2024-08-01-preview"),
+        # ("azure/gpt-4o-new-test", "2024-08-01-preview"),
     ],
 )
 @pytest.mark.flaky(retries=3, delay=1)
@@ -2495,7 +2495,7 @@ def test_completion_azure_extra_headers():
         litellm.client_session = http_client
         try:
             response = completion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                 messages=messages,
                 api_base=os.getenv("AZURE_API_BASE"),
                 api_version="2023-07-01-preview",
@@ -2544,7 +2544,7 @@ def test_completion_azure_ad_token():
         litellm.client_session = http_client
         try:
             response = completion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                 messages=messages,
                 azure_ad_token="my-special-token",
             )
@@ -2575,7 +2575,7 @@ def test_completion_azure_key_completion_arg():
         litellm.set_verbose = True
         ## Test azure call
         response = completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=messages,
             api_key=old_key,
             logprobs=True,
@@ -2633,7 +2633,7 @@ async def test_re_use_azure_async_client():
         ## Test azure call
         for _ in range(3):
             response = await litellm.acompletion(
-                model="azure/chatgpt-v-2", messages=messages, client=client
+                model="azure/chatgpt-v-3", messages=messages, client=client
             )
             print(f"response: {response}")
     except Exception as e:
@@ -2665,7 +2665,7 @@ def test_completion_azure():
         litellm.set_verbose = False
         ## Test azure call
         response = completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=messages,
             api_key="os.environ/AZURE_API_KEY",
         )
@@ -2673,7 +2673,7 @@ def test_completion_azure():
         print(f"response hidden params: {response._hidden_params}")
         ## Test azure flag for backwards-compat
         # response = completion(
-        #     model="chatgpt-v-2",
+        #     model="chatgpt-v-3",
         #     messages=messages,
         #     azure=True,
         #     max_tokens=10
@@ -2712,7 +2712,7 @@ def test_azure_openai_ad_token():
     litellm.input_callback = [tester]
     try:
         response = litellm.completion(
-            model="azure/chatgpt-v-2",  # e.g. gpt-35-instant
+            model="azure/chatgpt-v-3",  # e.g. gpt-35-instant
             messages=[
                 {
                     "role": "user",
@@ -2750,7 +2750,7 @@ def test_completion_azure2():
 
         ## Test azure call
         response = completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=messages,
             api_base=api_base,
             api_key=api_key,
@@ -2787,7 +2787,7 @@ def test_completion_azure3():
 
         ## Test azure call
         response = completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=messages,
             max_tokens=10,
         )
@@ -2835,7 +2835,7 @@ def test_completion_azure_with_litellm_key():
         openai.api_key = "ymca"
 
         response = completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=messages,
         )
         # Add any assertions here to check the response
@@ -2863,7 +2863,7 @@ def test_completion_azure_deployment_id():
     try:
         litellm.set_verbose = True
         response = completion(
-            deployment_id="chatgpt-v-2",
+            deployment_id="chatgpt-v-3",
             model="gpt-3.5-turbo",
             messages=messages,
         )
@@ -3925,7 +3925,7 @@ def test_completion_stream_watsonx():
 @pytest.mark.parametrize(
     "provider, model, project, region_name, token",
     [
-        ("azure", "chatgpt-v-2", None, None, "test-token"),
+        ("azure", "chatgpt-v-3", None, None, "test-token"),
         ("vertex_ai", "anthropic-claude-3", "adroit-crow-1", "us-east1", None),
         ("watsonx", "ibm/granite", "96946574", "dallas", "1234"),
         ("bedrock", "anthropic.claude-3", None, "us-east-1", None),
@@ -4178,7 +4178,7 @@ async def test_completion_ai21_chat():
 
 @pytest.mark.parametrize(
     "model",
-    ["gpt-4o", "azure/chatgpt-v-2"],
+    ["gpt-4o", "azure/chatgpt-v-3"],
 )
 @pytest.mark.parametrize(
     "stream",
@@ -4200,7 +4200,7 @@ def test_completion_response_ratelimit_headers(model, stream):
     assert "x-ratelimit-remaining-requests" in additional_headers
     assert "x-ratelimit-remaining-tokens" in additional_headers
 
-    if model == "azure/chatgpt-v-2":
+    if model == "azure/chatgpt-v-3":
         # Azure OpenAI header
         assert "llm_provider-azureml-model-session" in additional_headers
     if model == "claude-3-sonnet-20240229":
diff --git a/tests/local_testing/test_config.py b/tests/local_testing/test_config.py
index ab8365b2d1..b56be32881 100644
--- a/tests/local_testing/test_config.py
+++ b/tests/local_testing/test_config.py
@@ -46,7 +46,7 @@ async def test_delete_deployment():
     import base64
 
     litellm_params = LiteLLM_Params(
-        model="azure/chatgpt-v-2",
+        model="azure/chatgpt-v-3",
         api_key=os.getenv("AZURE_API_KEY"),
         api_base=os.getenv("AZURE_API_BASE"),
         api_version=os.getenv("AZURE_API_VERSION"),
@@ -232,7 +232,7 @@ async def test_db_error_new_model_check():
 
 
 litellm_params = LiteLLM_Params(
-    model="azure/chatgpt-v-2",
+    model="azure/chatgpt-v-3",
     api_key=os.getenv("AZURE_API_KEY"),
     api_base=os.getenv("AZURE_API_BASE"),
     api_version=os.getenv("AZURE_API_VERSION"),
@@ -250,7 +250,7 @@ def _create_model_list(flag_value: Literal[0, 1], master_key: str):
     import base64
 
     new_litellm_params = LiteLLM_Params(
-        model="azure/chatgpt-v-2-3",
+        model="azure/chatgpt-v-3-3",
         api_key=os.getenv("AZURE_API_KEY"),
         api_base=os.getenv("AZURE_API_BASE"),
         api_version=os.getenv("AZURE_API_VERSION"),
diff --git a/tests/local_testing/test_configs/test_bad_config.yaml b/tests/local_testing/test_configs/test_bad_config.yaml
index 7c802a8408..0a16ecb3c5 100644
--- a/tests/local_testing/test_configs/test_bad_config.yaml
+++ b/tests/local_testing/test_configs/test_bad_config.yaml
@@ -5,12 +5,12 @@ model_list:
       model: gpt-3.5-turbo
   - model_name: working-azure-gpt-3.5-turbo
     litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
       api_base: os.environ/AZURE_API_BASE
       api_key: os.environ/AZURE_API_KEY
   - model_name: azure-gpt-3.5-turbo
     litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
       api_base: os.environ/AZURE_API_BASE
       api_key: bad-key
   - model_name: azure-embedding
diff --git a/tests/local_testing/test_configs/test_cloudflare_azure_with_cache_config.yaml b/tests/local_testing/test_configs/test_cloudflare_azure_with_cache_config.yaml
index c3c3cb1c32..aeadbeb872 100644
--- a/tests/local_testing/test_configs/test_cloudflare_azure_with_cache_config.yaml
+++ b/tests/local_testing/test_configs/test_cloudflare_azure_with_cache_config.yaml
@@ -1,7 +1,7 @@
 model_list:
   - model_name: azure-cloudflare
     litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
       api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
       api_key: os.environ/AZURE_API_KEY
       api_version: 2023-07-01-preview
diff --git a/tests/local_testing/test_configs/test_config_no_auth.yaml b/tests/local_testing/test_configs/test_config_no_auth.yaml
index 1c5ddf2266..075bf7a09d 100644
--- a/tests/local_testing/test_configs/test_config_no_auth.yaml
+++ b/tests/local_testing/test_configs/test_config_no_auth.yaml
@@ -12,7 +12,7 @@ model_list:
 - litellm_params:
     api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
     api_key: os.environ/AZURE_API_KEY
-    model: azure/chatgpt-v-2
+    model: azure/chatgpt-v-3
   model_name: azure-cloudflare-model
 - litellm_params:
     api_base: https://openai-france-1234.openai.azure.com
diff --git a/tests/local_testing/test_configs/test_custom_logger.yaml b/tests/local_testing/test_configs/test_custom_logger.yaml
index 145c618edd..2ad500b36f 100644
--- a/tests/local_testing/test_configs/test_custom_logger.yaml
+++ b/tests/local_testing/test_configs/test_custom_logger.yaml
@@ -1,7 +1,7 @@
 model_list: 
   - model_name: Azure OpenAI GPT-4 Canada
     litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
       api_base: os.environ/AZURE_API_BASE
       api_key: os.environ/AZURE_API_KEY
       api_version: "2023-07-01-preview"
diff --git a/tests/local_testing/test_custom_callback_input.py b/tests/local_testing/test_custom_callback_input.py
index 222572935b..055ed821d0 100644
--- a/tests/local_testing/test_custom_callback_input.py
+++ b/tests/local_testing/test_custom_callback_input.py
@@ -450,12 +450,12 @@ def test_chat_azure_stream():
         customHandler = CompletionCustomHandler()
         litellm.callbacks = [customHandler]
         response = litellm.completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
         )
         # test streaming
         response = litellm.completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
             stream=True,
         )
@@ -464,7 +464,7 @@ def test_chat_azure_stream():
         # test failure callback
         try:
             response = litellm.completion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                 messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
                 api_key="my-bad-key",
                 stream=True,
@@ -491,12 +491,12 @@ async def test_async_chat_azure_stream():
         customHandler = CompletionCustomHandler()
         litellm.callbacks = [customHandler]
         response = await litellm.acompletion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}],
         )
         ## test streaming
         response = await litellm.acompletion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}],
             stream=True,
         )
@@ -507,7 +507,7 @@ async def test_async_chat_azure_stream():
         # test failure callback
         try:
             response = await litellm.acompletion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                 messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}],
                 api_key="my-bad-key",
                 stream=True,
@@ -1018,7 +1018,7 @@ async def test_async_completion_azure_caching():
     litellm.callbacks = [customHandler_caching]
     unique_time = time.time()
     response1 = await litellm.acompletion(
-        model="azure/chatgpt-v-2",
+        model="azure/chatgpt-v-3",
         messages=[
             {"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
         ],
@@ -1027,7 +1027,7 @@ async def test_async_completion_azure_caching():
     await asyncio.sleep(1)
     print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
     response2 = await litellm.acompletion(
-        model="azure/chatgpt-v-2",
+        model="azure/chatgpt-v-3",
         messages=[
             {"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
         ],
@@ -1056,7 +1056,7 @@ async def test_async_completion_azure_caching_streaming():
     litellm.callbacks = [customHandler_caching]
     unique_time = uuid.uuid4()
     response1 = await litellm.acompletion(
-        model="azure/chatgpt-v-2",
+        model="azure/chatgpt-v-3",
         messages=[
             {"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
         ],
@@ -1069,7 +1069,7 @@ async def test_async_completion_azure_caching_streaming():
     initial_customhandler_caching_states = len(customHandler_caching.states)
     print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
     response2 = await litellm.acompletion(
-        model="azure/chatgpt-v-2",
+        model="azure/chatgpt-v-3",
         messages=[
             {"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
         ],
@@ -1207,7 +1207,7 @@ def test_turn_off_message_logging():
     "model",
     [
         "ft:gpt-3.5-turbo:my-org:custom_suffix:id"
-    ],  # "gpt-3.5-turbo", "azure/chatgpt-v-2",
+    ],  # "gpt-3.5-turbo", "azure/chatgpt-v-3",
 )
 @pytest.mark.parametrize(
     "turn_off_message_logging",
diff --git a/tests/local_testing/test_custom_callback_router.py b/tests/local_testing/test_custom_callback_router.py
index 310a497922..83289abf5f 100644
--- a/tests/local_testing/test_custom_callback_router.py
+++ b/tests/local_testing/test_custom_callback_router.py
@@ -284,7 +284,7 @@ class CompletionCustomHandler(
             )
 
             if (
-                kwargs["model"] == "chatgpt-v-2"
+                kwargs["model"] == "chatgpt-v-3"
                 and base_model is not None
                 and kwargs["stream"] != True
             ):
@@ -394,7 +394,7 @@ async def test_async_chat_azure():
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -438,7 +438,7 @@ async def test_async_chat_azure():
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": "my-bad-key",
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -545,7 +545,7 @@ async def test_async_chat_azure_with_fallbacks():
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": "my-bad-key",
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -606,7 +606,7 @@ async def test_async_completion_azure_caching():
         {
             "model_name": "gpt-3.5-turbo",  # openai model name
             "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": os.getenv("AZURE_API_KEY"),
                 "api_version": os.getenv("AZURE_API_VERSION"),
                 "api_base": os.getenv("AZURE_API_BASE"),
diff --git a/tests/local_testing/test_custom_logger.py b/tests/local_testing/test_custom_logger.py
index d9eb50eb73..ba9973e11d 100644
--- a/tests/local_testing/test_custom_logger.py
+++ b/tests/local_testing/test_custom_logger.py
@@ -160,7 +160,7 @@ def test_completion_azure_stream_moderation_failure():
         ]
         try:
             response = completion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                 messages=messages,
                 mock_response="Exception: content_filter_policy",
                 stream=True,
@@ -195,7 +195,7 @@ def test_async_custom_handler_stream():
         async def test_1():
             nonlocal complete_streaming_response
             response = await litellm.acompletion(
-                model="azure/chatgpt-v-2", messages=messages, stream=True
+                model="azure/chatgpt-v-3", messages=messages, stream=True
             )
             async for chunk in response:
                 complete_streaming_response += (
@@ -239,7 +239,7 @@ def test_azure_completion_stream():
         complete_streaming_response = ""
 
         response = litellm.completion(
-            model="azure/chatgpt-v-2", messages=messages, stream=True
+            model="azure/chatgpt-v-3", messages=messages, stream=True
         )
         for chunk in response:
             complete_streaming_response += chunk["choices"][0]["delta"]["content"] or ""
diff --git a/tests/local_testing/test_exceptions.py b/tests/local_testing/test_exceptions.py
index 229ea07c7a..be7710f58a 100644
--- a/tests/local_testing/test_exceptions.py
+++ b/tests/local_testing/test_exceptions.py
@@ -51,7 +51,7 @@ async def test_content_policy_exception_azure():
         # this is ony a test - we needed some way to invoke the exception :(
         litellm.set_verbose = True
         response = await litellm.acompletion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=[{"role": "user", "content": "where do I buy lethal drugs from"}],
             mock_response="Exception: content_filter_policy",
         )
@@ -124,7 +124,7 @@ def test_context_window_with_fallbacks(model):
     ctx_window_fallback_dict = {
         "command-nightly": "claude-2.1",
         "gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k",
-        "azure/chatgpt-v-2": "gpt-3.5-turbo-16k",
+        "azure/chatgpt-v-3": "gpt-3.5-turbo-16k",
     }
     sample_text = "how does a court case get to the Supreme Court?" * 1000
     messages = [{"content": sample_text, "role": "user"}]
@@ -161,7 +161,7 @@ def invalid_auth(model):  # set the model key to an invalid key, depending on th
             os.environ["AWS_REGION_NAME"] = "bad-key"
             temporary_secret_key = os.environ["AWS_SECRET_ACCESS_KEY"]
             os.environ["AWS_SECRET_ACCESS_KEY"] = "bad-key"
-        elif model == "azure/chatgpt-v-2":
+        elif model == "azure/chatgpt-v-3":
             temporary_key = os.environ["AZURE_API_KEY"]
             os.environ["AZURE_API_KEY"] = "bad-key"
         elif model == "claude-3-5-haiku-20241022":
@@ -262,7 +262,7 @@ def test_completion_azure_exception():
         old_azure_key = os.environ["AZURE_API_KEY"]
         os.environ["AZURE_API_KEY"] = "good morning"
         response = completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=[{"role": "user", "content": "hello"}],
         )
         os.environ["AZURE_API_KEY"] = old_azure_key
@@ -309,7 +309,7 @@ async def asynctest_completion_azure_exception():
         old_azure_key = os.environ["AZURE_API_KEY"]
         os.environ["AZURE_API_KEY"] = "good morning"
         response = await litellm.acompletion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=[{"role": "user", "content": "hello"}],
         )
         print(f"response: {response}")
@@ -528,7 +528,7 @@ def test_content_policy_violation_error_streaming():
     async def test_get_response():
         try:
             response = await litellm.acompletion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                 messages=[{"role": "user", "content": "say 1"}],
                 temperature=0,
                 top_p=1,
@@ -557,7 +557,7 @@ def test_content_policy_violation_error_streaming():
     async def test_get_error():
         try:
             response = await litellm.acompletion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                 messages=[
                     {"role": "user", "content": "where do i buy lethal drugs from"}
                 ],
@@ -754,7 +754,7 @@ def test_litellm_predibase_exception():
 #     return False
 # # Repeat each model 500 times
 # # extended_models = [model for model in models for _ in range(250)]
-# extended_models = ["azure/chatgpt-v-2" for _ in range(250)]
+# extended_models = ["azure/chatgpt-v-3" for _ in range(250)]
 
 # def worker(model):
 #     return test_model_call(model)
@@ -934,7 +934,7 @@ def _pre_call_utils_httpx(
         ("openai", "gpt-3.5-turbo", "chat_completion", False),
         ("openai", "gpt-3.5-turbo", "chat_completion", True),
         ("openai", "gpt-3.5-turbo-instruct", "completion", True),
-        ("azure", "azure/chatgpt-v-2", "chat_completion", True),
+        ("azure", "azure/chatgpt-v-3", "chat_completion", True),
         ("azure", "azure/text-embedding-ada-002", "embedding", True),
         ("azure", "azure_text/gpt-3.5-turbo-instruct", "completion", True),
     ],
@@ -1158,7 +1158,7 @@ async def test_exception_with_headers_httpx(
 
 
 @pytest.mark.asyncio
-@pytest.mark.parametrize("model", ["azure/chatgpt-v-2", "openai/gpt-3.5-turbo"])
+@pytest.mark.parametrize("model", ["azure/chatgpt-v-3", "openai/gpt-3.5-turbo"])
 async def test_bad_request_error_contains_httpx_response(model):
     """
     Test that the BadRequestError contains the httpx response
@@ -1209,7 +1209,7 @@ def test_context_window_exceeded_error_from_litellm_proxy():
 
 @pytest.mark.parametrize("sync_mode", [True, False])
 @pytest.mark.parametrize("stream_mode", [True, False])
-@pytest.mark.parametrize("model", ["azure/gpt-4o"])  # "gpt-4o-mini",
+@pytest.mark.parametrize("model", ["azure/gpt-4o-new-test"])  # "gpt-4o-mini",
 @pytest.mark.asyncio
 async def test_exception_bubbling_up(sync_mode, stream_mode, model):
     """
diff --git a/tests/local_testing/test_gcs_bucket.py b/tests/local_testing/test_gcs_bucket.py
index b64475c227..0004fae7c4 100644
--- a/tests/local_testing/test_gcs_bucket.py
+++ b/tests/local_testing/test_gcs_bucket.py
@@ -108,14 +108,14 @@ async def test_aaabasic_gcs_logger():
             },
             "endpoint": "http://localhost:4000/chat/completions",
             "model_group": "gpt-3.5-turbo",
-            "deployment": "azure/chatgpt-v-2",
+            "deployment": "azure/chatgpt-v-3",
             "model_info": {
                 "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
                 "db_model": False,
             },
             "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
             "caching_groups": None,
-            "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
+            "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
         },
     )
 
@@ -216,14 +216,14 @@ async def test_basic_gcs_logger_failure():
                 },
                 "endpoint": "http://localhost:4000/chat/completions",
                 "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
                 "model_info": {
                     "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
                     "db_model": False,
                 },
                 "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
                 "caching_groups": None,
-                "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
+                "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
             },
         )
     except Exception:
@@ -626,14 +626,14 @@ async def test_basic_gcs_logger_with_folder_in_bucket_name():
             },
             "endpoint": "http://localhost:4000/chat/completions",
             "model_group": "gpt-3.5-turbo",
-            "deployment": "azure/chatgpt-v-2",
+            "deployment": "azure/chatgpt-v-3",
             "model_info": {
                 "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
                 "db_model": False,
             },
             "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
             "caching_groups": None,
-            "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
+            "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
         },
     )
 
diff --git a/tests/local_testing/test_health_check.py b/tests/local_testing/test_health_check.py
index 809cd1ccbd..bf326d884b 100644
--- a/tests/local_testing/test_health_check.py
+++ b/tests/local_testing/test_health_check.py
@@ -20,7 +20,7 @@ import litellm
 async def test_azure_health_check():
     response = await litellm.ahealth_check(
         model_params={
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
             "messages": [{"role": "user", "content": "Hey, how's it going?"}],
             "api_key": os.getenv("AZURE_API_KEY"),
             "api_base": os.getenv("AZURE_API_BASE"),
diff --git a/tests/local_testing/test_helicone_integration.py b/tests/local_testing/test_helicone_integration.py
index 968a9aa5b1..3a6fa0309b 100644
--- a/tests/local_testing/test_helicone_integration.py
+++ b/tests/local_testing/test_helicone_integration.py
@@ -78,7 +78,7 @@ async def make_async_calls(metadata=None, **completion_kwargs):
 
 def create_async_task(**completion_kwargs):
     completion_args = {
-        "model": "azure/chatgpt-v-2",
+        "model": "azure/chatgpt-v-3",
         "api_version": "2024-02-01",
         "messages": [{"role": "user", "content": "This is a test"}],
         "max_tokens": 5,
diff --git a/tests/local_testing/test_least_busy_routing.py b/tests/local_testing/test_least_busy_routing.py
index cf69f596d9..7e4393da0b 100644
--- a/tests/local_testing/test_least_busy_routing.py
+++ b/tests/local_testing/test_least_busy_routing.py
@@ -33,7 +33,7 @@ def test_model_added():
         "litellm_params": {
             "metadata": {
                 "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
             },
             "model_info": {"id": "1234"},
         }
@@ -47,7 +47,7 @@ def test_get_available_deployments():
     test_cache = DualCache()
     least_busy_logger = LeastBusyLoggingHandler(router_cache=test_cache, model_list=[])
     model_group = "gpt-3.5-turbo"
-    deployment = "azure/chatgpt-v-2"
+    deployment = "azure/chatgpt-v-3"
     kwargs = {
         "litellm_params": {
             "metadata": {
@@ -113,7 +113,7 @@ async def test_router_get_available_deployments(async_test):
     router.leastbusy_logger.test_flag = True
 
     model_group = "azure-model"
-    deployment = "azure/chatgpt-v-2"
+    deployment = "azure/chatgpt-v-3"
     request_count_dict = {1: 10, 2: 54, 3: 100}
     cache_key = f"{model_group}_request_count"
     if async_test is True:
diff --git a/tests/local_testing/test_load_test_router_s3.py b/tests/local_testing/test_load_test_router_s3.py
index 3a022ae991..3a2567b686 100644
--- a/tests/local_testing/test_load_test_router_s3.py
+++ b/tests/local_testing/test_load_test_router_s3.py
@@ -46,7 +46,7 @@
 #         {
 #             "model_name": "gpt-3.5-turbo",
 #             "litellm_params": {
-#                 "model": "azure/chatgpt-v-2",
+#                 "model": "azure/chatgpt-v-3",
 #                 "api_key": os.getenv("AZURE_API_KEY"),
 #                 "api_base": os.getenv("AZURE_API_BASE"),
 #                 "api_version": os.getenv("AZURE_API_VERSION"),
diff --git a/tests/local_testing/test_loadtest_router.py b/tests/local_testing/test_loadtest_router.py
index a12a45b514..0d8a09ca62 100644
--- a/tests/local_testing/test_loadtest_router.py
+++ b/tests/local_testing/test_loadtest_router.py
@@ -38,7 +38,7 @@
 #         {
 #             "model_name": "gpt-3.5-turbo",
 #             "litellm_params": {
-#                 "model": "azure/chatgpt-v-2",
+#                 "model": "azure/chatgpt-v-3",
 #                 "api_key": os.getenv("AZURE_API_KEY"),
 #                 "api_base": os.getenv("AZURE_API_BASE"),
 #                 "api_version": os.getenv("AZURE_API_VERSION"),
diff --git a/tests/local_testing/test_lowest_cost_routing.py b/tests/local_testing/test_lowest_cost_routing.py
index 4e3105b5ff..caca007052 100644
--- a/tests/local_testing/test_lowest_cost_routing.py
+++ b/tests/local_testing/test_lowest_cost_routing.py
@@ -60,7 +60,7 @@ async def test_get_available_deployments_custom_price():
         {
             "model_name": "gpt-3.5-turbo",
             "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "input_cost_per_token": 0.00003,
                 "output_cost_per_token": 0.00003,
             },
diff --git a/tests/local_testing/test_lowest_latency_routing.py b/tests/local_testing/test_lowest_latency_routing.py
index 4234490982..74dae25c1f 100644
--- a/tests/local_testing/test_lowest_latency_routing.py
+++ b/tests/local_testing/test_lowest_latency_routing.py
@@ -48,7 +48,7 @@ async def test_latency_memory_leak(sync_mode):
         "litellm_params": {
             "metadata": {
                 "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
             },
             "model_info": {"id": deployment_id},
         }
@@ -130,7 +130,7 @@ def test_latency_updated():
         "litellm_params": {
             "metadata": {
                 "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
             },
             "model_info": {"id": deployment_id},
         }
@@ -173,7 +173,7 @@ def test_latency_updated_custom_ttl():
         "litellm_params": {
             "metadata": {
                 "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
             },
             "model_info": {"id": deployment_id},
         }
@@ -200,12 +200,12 @@ def test_get_available_deployments():
     model_list = [
         {
             "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
             "model_info": {"id": "1234"},
         },
         {
             "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
             "model_info": {"id": "5678"},
         },
     ]
@@ -219,7 +219,7 @@ def test_get_available_deployments():
         "litellm_params": {
             "metadata": {
                 "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
             },
             "model_info": {"id": deployment_id},
         }
@@ -240,7 +240,7 @@ def test_get_available_deployments():
         "litellm_params": {
             "metadata": {
                 "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
             },
             "model_info": {"id": deployment_id},
         }
@@ -275,7 +275,7 @@ async def _deploy(lowest_latency_logger, deployment_id, tokens_used, duration):
         "litellm_params": {
             "metadata": {
                 "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
             },
             "model_info": {"id": deployment_id},
         }
@@ -317,12 +317,12 @@ def test_get_available_endpoints_tpm_rpm_check_async(ans_rpm):
     model_list = [
         {
             "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
             "model_info": {"id": "1234", "rpm": ans_rpm},
         },
         {
             "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
             "model_info": {"id": "5678", "rpm": non_ans_rpm},
         },
     ]
@@ -366,12 +366,12 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm):
     model_list = [
         {
             "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
             "model_info": {"id": "1234", "rpm": ans_rpm},
         },
         {
             "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
             "model_info": {"id": "5678", "rpm": non_ans_rpm},
         },
     ]
@@ -385,7 +385,7 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm):
         "litellm_params": {
             "metadata": {
                 "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
             },
             "model_info": {"id": deployment_id},
         }
@@ -407,7 +407,7 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm):
         "litellm_params": {
             "metadata": {
                 "model_group": "gpt-3.5-turbo",
-                "deployment": "azure/chatgpt-v-2",
+                "deployment": "azure/chatgpt-v-3",
             },
             "model_info": {"id": deployment_id},
         }
diff --git a/tests/local_testing/test_mem_usage.py b/tests/local_testing/test_mem_usage.py
index 4a804b4033..9f18fb1e2d 100644
--- a/tests/local_testing/test_mem_usage.py
+++ b/tests/local_testing/test_mem_usage.py
@@ -29,7 +29,7 @@
 #     {
 #         "model_name": "gpt-3.5-turbo",  # openai model name
 #         "litellm_params": {  # params for litellm completion/embedding call
-#             "model": "azure/chatgpt-v-2",
+#             "model": "azure/chatgpt-v-3",
 #             "api_key": os.getenv("AZURE_API_KEY"),
 #             "api_version": os.getenv("AZURE_API_VERSION"),
 #             "api_base": os.getenv("AZURE_API_BASE"),
@@ -40,7 +40,7 @@
 #     {
 #         "model_name": "bad-model",  # openai model name
 #         "litellm_params": {  # params for litellm completion/embedding call
-#             "model": "azure/chatgpt-v-2",
+#             "model": "azure/chatgpt-v-3",
 #             "api_key": "bad-key",
 #             "api_version": os.getenv("AZURE_API_VERSION"),
 #             "api_base": os.getenv("AZURE_API_BASE"),
diff --git a/tests/local_testing/test_mock_request.py b/tests/local_testing/test_mock_request.py
index 6842767d9d..6a9c5239f4 100644
--- a/tests/local_testing/test_mock_request.py
+++ b/tests/local_testing/test_mock_request.py
@@ -157,7 +157,7 @@ def test_router_mock_request_with_mock_timeout_with_fallbacks():
             {
                 "model_name": "azure-gpt",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_base": os.getenv("AZURE_API_BASE"),
                 },
diff --git a/tests/local_testing/test_prometheus_service.py b/tests/local_testing/test_prometheus_service.py
index c640532a07..cfbd6a1a83 100644
--- a/tests/local_testing/test_prometheus_service.py
+++ b/tests/local_testing/test_prometheus_service.py
@@ -104,12 +104,12 @@ async def test_router_with_caching():
         model_list = [
             {
                 "model_name": "azure/gpt-4",
-                "litellm_params": get_azure_params("chatgpt-v-2"),
+                "litellm_params": get_azure_params("chatgpt-v-3"),
                 "tpm": 100,
             },
             {
                 "model_name": "azure/gpt-4",
-                "litellm_params": get_azure_params("chatgpt-v-2"),
+                "litellm_params": get_azure_params("chatgpt-v-3"),
                 "tpm": 1000,
             },
         ]
diff --git a/tests/local_testing/test_prompt_injection_detection.py b/tests/local_testing/test_prompt_injection_detection.py
index c493a37227..8443aadcc6 100644
--- a/tests/local_testing/test_prompt_injection_detection.py
+++ b/tests/local_testing/test_prompt_injection_detection.py
@@ -107,7 +107,7 @@ async def test_prompt_injection_llm_eval():
                 {
                     "model_name": "gpt-3.5-turbo",  # openai model name
                     "litellm_params": {  # params for litellm completion/embedding call
-                        "model": "azure/chatgpt-v-2",
+                        "model": "azure/chatgpt-v-3",
                         "api_key": os.getenv("AZURE_API_KEY"),
                         "api_version": os.getenv("AZURE_API_VERSION"),
                         "api_base": os.getenv("AZURE_API_BASE"),
diff --git a/tests/local_testing/test_provider_specific_config.py b/tests/local_testing/test_provider_specific_config.py
index fc382bd3e9..8fc4c6ec21 100644
--- a/tests/local_testing/test_provider_specific_config.py
+++ b/tests/local_testing/test_provider_specific_config.py
@@ -729,7 +729,7 @@ def azure_openai_test_completion():
     try:
         # OVERRIDE WITH DYNAMIC MAX TOKENS
         response_1 = litellm.completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=[
                 {
                     "content": "Hello, how are you? Be as verbose as possible",
@@ -743,7 +743,7 @@ def azure_openai_test_completion():
 
         # USE CONFIG TOKENS
         response_2 = litellm.completion(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=[
                 {
                     "content": "Hello, how are you? Be as verbose as possible",
diff --git a/tests/local_testing/test_router.py b/tests/local_testing/test_router.py
index 13eaeb09ab..eb845559e2 100644
--- a/tests/local_testing/test_router.py
+++ b/tests/local_testing/test_router.py
@@ -266,7 +266,7 @@ def test_router_sensitive_keys():
                 {
                     "model_name": "gpt-3.5-turbo",  # openai model name
                     "litellm_params": {  # params for litellm completion/embedding call
-                        "model": "azure/chatgpt-v-2",
+                        "model": "azure/chatgpt-v-3",
                         "api_key": "special-key",
                     },
                     "model_info": {"id": 12345},
@@ -334,7 +334,7 @@ async def test_router_retries(sync_mode):
         {
             "model_name": "gpt-3.5-turbo",
             "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": os.getenv("AZURE_API_KEY"),
                 "api_base": os.getenv("AZURE_API_BASE"),
                 "api_version": os.getenv("AZURE_API_VERSION"),
@@ -417,7 +417,7 @@ def test_exception_raising():
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": "bad-key",
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -479,7 +479,7 @@ def test_reading_key_from_model_list():
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": old_api_key,
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -535,7 +535,7 @@ def test_reading_key_from_model_list():
 def test_call_one_endpoint():
     # [PROD TEST CASE]
     # user passes one deployment they want to call on the router, we call the specified one
-    # this test makes a completion calls azure/chatgpt-v-2, it should work
+    # this test makes a completion calls azure/chatgpt-v-3, it should work
     try:
         print("Testing calling a specific deployment")
         old_api_key = os.environ["AZURE_API_KEY"]
@@ -544,7 +544,7 @@ def test_call_one_endpoint():
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": old_api_key,
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -574,7 +574,7 @@ def test_call_one_endpoint():
 
         async def call_azure_completion():
             response = await router.acompletion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                 messages=[{"role": "user", "content": "hello this request will pass"}],
                 specific_deployment=True,
             )
@@ -620,7 +620,7 @@ def test_router_azure_acompletion():
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": old_api_key,
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -793,7 +793,7 @@ def test_router_context_window_check_pre_call_check_in_group_custom_model_info()
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -847,7 +847,7 @@ def test_router_context_window_check_pre_call_check():
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -901,7 +901,7 @@ def test_router_context_window_check_pre_call_check_out_group():
             {
                 "model_name": "gpt-3.5-turbo-small",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -980,7 +980,7 @@ def test_router_region_pre_call_check(allowed_model_region):
         {
             "model_name": "gpt-3.5-turbo",  # openai model name
             "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": os.getenv("AZURE_API_KEY"),
                 "api_version": os.getenv("AZURE_API_VERSION"),
                 "api_base": os.getenv("AZURE_API_BASE"),
@@ -2616,7 +2616,7 @@ def test_is_team_specific_model():
 #             {
 #                 "model_name": "gpt-3.5-turbo",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_base": os.getenv("AZURE_API_BASE"),
 #                     "tpm": 100000,
@@ -2626,7 +2626,7 @@ def test_is_team_specific_model():
 #             {
 #                 "model_name": "gpt-3.5-turbo",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_base": os.getenv("AZURE_API_BASE"),
 #                     "tpm": 500,
diff --git a/tests/local_testing/test_router_budget_limiter.py b/tests/local_testing/test_router_budget_limiter.py
index 8d4948f8f9..9c20b6d098 100644
--- a/tests/local_testing/test_router_budget_limiter.py
+++ b/tests/local_testing/test_router_budget_limiter.py
@@ -74,7 +74,7 @@ async def test_provider_budgets_e2e_test():
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -268,7 +268,7 @@ async def test_prometheus_metric_tracking():
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
diff --git a/tests/local_testing/test_router_caching.py b/tests/local_testing/test_router_caching.py
index 53a79b9434..574f133ace 100644
--- a/tests/local_testing/test_router_caching.py
+++ b/tests/local_testing/test_router_caching.py
@@ -96,7 +96,7 @@ async def test_acompletion_caching_on_router():
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_base": os.getenv("AZURE_API_BASE"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
@@ -213,7 +213,7 @@ async def test_acompletion_caching_with_ttl_on_router():
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_base": os.getenv("AZURE_API_BASE"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
@@ -279,7 +279,7 @@ async def test_acompletion_caching_on_router_caching_groups():
             {
                 "model_name": "azure-gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_base": os.getenv("AZURE_API_BASE"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
diff --git a/tests/local_testing/test_router_client_init.py b/tests/local_testing/test_router_client_init.py
index 1440dfecaa..42fc49a4c7 100644
--- a/tests/local_testing/test_router_client_init.py
+++ b/tests/local_testing/test_router_client_init.py
@@ -43,7 +43,7 @@ async def test_router_init():
         {
             "model_name": "gpt-3.5-turbo",
             "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": os.getenv("AZURE_API_KEY"),
                 "api_base": os.getenv("AZURE_API_BASE"),
                 "api_version": os.getenv("AZURE_API_VERSION"),
diff --git a/tests/local_testing/test_router_cooldowns.py b/tests/local_testing/test_router_cooldowns.py
index 80ceb33c01..8428200109 100644
--- a/tests/local_testing/test_router_cooldowns.py
+++ b/tests/local_testing/test_router_cooldowns.py
@@ -41,7 +41,7 @@ async def test_cooldown_badrequest_error():
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
diff --git a/tests/local_testing/test_router_debug_logs.py b/tests/local_testing/test_router_debug_logs.py
index ba59a3c2fd..bce09404d8 100644
--- a/tests/local_testing/test_router_debug_logs.py
+++ b/tests/local_testing/test_router_debug_logs.py
@@ -33,7 +33,7 @@ def test_async_fallbacks(caplog):
         {
             "model_name": "azure/gpt-3.5-turbo",
             "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": os.getenv("AZURE_API_KEY"),
                 "api_version": os.getenv("AZURE_API_VERSION"),
                 "api_base": os.getenv("AZURE_API_BASE"),
@@ -93,7 +93,7 @@ def test_async_fallbacks(caplog):
     # - error request, falling back notice, success notice
     expected_logs = [
         "Falling back to model_group = azure/gpt-3.5-turbo",
-        "litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
+        "litellm.acompletion(model=azure/chatgpt-v-3)\x1b[32m 200 OK\x1b[0m",
         "Successful fallback b/w models.",
     ]
 
diff --git a/tests/local_testing/test_router_fallbacks.py b/tests/local_testing/test_router_fallbacks.py
index 576ad0fcaa..ced2c4dd9e 100644
--- a/tests/local_testing/test_router_fallbacks.py
+++ b/tests/local_testing/test_router_fallbacks.py
@@ -67,7 +67,7 @@ def test_sync_fallbacks():
             {  # list of model deployments
                 "model_name": "azure/gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": "bad-key",
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -78,7 +78,7 @@ def test_sync_fallbacks():
             {  # list of model deployments
                 "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -150,7 +150,7 @@ async def test_async_fallbacks():
         {  # list of model deployments
             "model_name": "azure/gpt-3.5-turbo",  # openai model name
             "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": "bad-key",
                 "api_version": os.getenv("AZURE_API_VERSION"),
                 "api_base": os.getenv("AZURE_API_BASE"),
@@ -161,7 +161,7 @@ async def test_async_fallbacks():
         {  # list of model deployments
             "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
             "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": os.getenv("AZURE_API_KEY"),
                 "api_version": os.getenv("AZURE_API_VERSION"),
                 "api_base": os.getenv("AZURE_API_BASE"),
@@ -349,7 +349,7 @@ def test_dynamic_fallbacks_sync():
             {  # list of model deployments
                 "model_name": "azure/gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": "bad-key",
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -360,7 +360,7 @@ def test_dynamic_fallbacks_sync():
             {  # list of model deployments
                 "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -426,7 +426,7 @@ async def test_dynamic_fallbacks_async():
             {  # list of model deployments
                 "model_name": "azure/gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": "bad-key",
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -437,7 +437,7 @@ async def test_dynamic_fallbacks_async():
             {  # list of model deployments
                 "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -509,7 +509,7 @@ async def test_async_fallbacks_streaming():
         {  # list of model deployments
             "model_name": "azure/gpt-3.5-turbo",  # openai model name
             "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": "bad-key",
                 "api_version": os.getenv("AZURE_API_VERSION"),
                 "api_base": os.getenv("AZURE_API_BASE"),
@@ -520,7 +520,7 @@ async def test_async_fallbacks_streaming():
         {  # list of model deployments
             "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
             "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": os.getenv("AZURE_API_KEY"),
                 "api_version": os.getenv("AZURE_API_VERSION"),
                 "api_base": os.getenv("AZURE_API_BASE"),
@@ -594,7 +594,7 @@ def test_sync_fallbacks_streaming():
             {  # list of model deployments
                 "model_name": "azure/gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": "bad-key",
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -605,7 +605,7 @@ def test_sync_fallbacks_streaming():
             {  # list of model deployments
                 "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -675,7 +675,7 @@ async def test_async_fallbacks_max_retries_per_request():
         {  # list of model deployments
             "model_name": "azure/gpt-3.5-turbo",  # openai model name
             "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": "bad-key",
                 "api_version": os.getenv("AZURE_API_VERSION"),
                 "api_base": os.getenv("AZURE_API_BASE"),
@@ -686,7 +686,7 @@ async def test_async_fallbacks_max_retries_per_request():
         {  # list of model deployments
             "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
             "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": os.getenv("AZURE_API_KEY"),
                 "api_version": os.getenv("AZURE_API_VERSION"),
                 "api_base": os.getenv("AZURE_API_BASE"),
@@ -808,13 +808,13 @@ def test_ausage_based_routing_fallbacks():
         model_list = [
             {
                 "model_name": "azure/gpt-4-fast",
-                "litellm_params": get_azure_params("chatgpt-v-2"),
+                "litellm_params": get_azure_params("chatgpt-v-3"),
                 "model_info": {"id": 1},
                 "rpm": AZURE_FAST_RPM,
             },
             {
                 "model_name": "azure/gpt-4-basic",
-                "litellm_params": get_azure_params("chatgpt-v-2"),
+                "litellm_params": get_azure_params("chatgpt-v-3"),
                 "model_info": {"id": 2},
                 "rpm": AZURE_BASIC_RPM,
             },
@@ -889,7 +889,7 @@ def test_custom_cooldown_times():
             {  # list of model deployments
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": "bad-key",
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -899,7 +899,7 @@ def test_custom_cooldown_times():
             {  # list of model deployments
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -993,7 +993,7 @@ async def test_service_unavailable_fallbacks(sync_mode):
             {
                 "model_name": "gpt-3.5-turbo-0125-preview",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
diff --git a/tests/local_testing/test_router_get_deployments.py b/tests/local_testing/test_router_get_deployments.py
index efbb5d16e7..ff88824d4a 100644
--- a/tests/local_testing/test_router_get_deployments.py
+++ b/tests/local_testing/test_router_get_deployments.py
@@ -41,7 +41,7 @@ def test_weighted_selection_router():
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_base": os.getenv("AZURE_API_BASE"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
@@ -54,7 +54,7 @@ def test_weighted_selection_router():
         )
         selection_counts = defaultdict(int)
 
-        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
+        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
         for _ in range(1000):
             selected_model = router.get_available_deployment("gpt-3.5-turbo")
             selected_model_id = selected_model["litellm_params"]["model"]
@@ -64,10 +64,10 @@ def test_weighted_selection_router():
 
         total_requests = sum(selection_counts.values())
 
-        # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
+        # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
         assert (
-            selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
-        ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
+            selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
+        ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
 
         router.reset()
     except Exception as e:
@@ -97,7 +97,7 @@ def test_weighted_selection_router_tpm():
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_base": os.getenv("AZURE_API_BASE"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
@@ -110,7 +110,7 @@ def test_weighted_selection_router_tpm():
         )
         selection_counts = defaultdict(int)
 
-        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
+        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
         for _ in range(1000):
             selected_model = router.get_available_deployment("gpt-3.5-turbo")
             selected_model_id = selected_model["litellm_params"]["model"]
@@ -120,10 +120,10 @@ def test_weighted_selection_router_tpm():
 
         total_requests = sum(selection_counts.values())
 
-        # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
+        # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
         assert (
-            selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
-        ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
+            selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
+        ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
 
         router.reset()
     except Exception as e:
@@ -153,7 +153,7 @@ def test_weighted_selection_router_tpm_as_router_param():
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_base": os.getenv("AZURE_API_BASE"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
@@ -166,7 +166,7 @@ def test_weighted_selection_router_tpm_as_router_param():
         )
         selection_counts = defaultdict(int)
 
-        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
+        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
         for _ in range(1000):
             selected_model = router.get_available_deployment("gpt-3.5-turbo")
             selected_model_id = selected_model["litellm_params"]["model"]
@@ -176,10 +176,10 @@ def test_weighted_selection_router_tpm_as_router_param():
 
         total_requests = sum(selection_counts.values())
 
-        # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
+        # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
         assert (
-            selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
-        ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
+            selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
+        ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
 
         router.reset()
     except Exception as e:
@@ -210,7 +210,7 @@ def test_weighted_selection_router_rpm_as_router_param():
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_base": os.getenv("AZURE_API_BASE"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
@@ -224,7 +224,7 @@ def test_weighted_selection_router_rpm_as_router_param():
         )
         selection_counts = defaultdict(int)
 
-        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
+        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
         for _ in range(1000):
             selected_model = router.get_available_deployment("gpt-3.5-turbo")
             selected_model_id = selected_model["litellm_params"]["model"]
@@ -234,10 +234,10 @@ def test_weighted_selection_router_rpm_as_router_param():
 
         total_requests = sum(selection_counts.values())
 
-        # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
+        # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
         assert (
-            selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
-        ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
+            selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
+        ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
 
         router.reset()
     except Exception as e:
@@ -266,7 +266,7 @@ def test_weighted_selection_router_no_rpm_set():
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_base": os.getenv("AZURE_API_BASE"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
@@ -286,7 +286,7 @@ def test_weighted_selection_router_no_rpm_set():
         )
         selection_counts = defaultdict(int)
 
-        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
+        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
         for _ in range(1000):
             selected_model = router.get_available_deployment("claude-1")
             selected_model_id = selected_model["litellm_params"]["model"]
@@ -296,7 +296,7 @@ def test_weighted_selection_router_no_rpm_set():
 
         total_requests = sum(selection_counts.values())
 
-        # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
+        # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
         assert (
             selection_counts["bedrock/claude1.2"] / total_requests == 1
         ), f"Assertion failed: Selection counts {selection_counts}"
@@ -325,7 +325,7 @@ def test_model_group_aliases():
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_base": os.getenv("AZURE_API_BASE"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
@@ -358,7 +358,7 @@ def test_model_group_aliases():
                 )
 
         # test that
-        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
+        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
         selection_counts = defaultdict(int)
         for _ in range(1000):
             selected_model = router.get_available_deployment("gpt-3.5-turbo")
@@ -369,10 +369,10 @@ def test_model_group_aliases():
 
         total_requests = sum(selection_counts.values())
 
-        # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
+        # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
         assert (
-            selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
-        ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
+            selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
+        ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
 
         router.reset()
     except Exception as e:
@@ -552,7 +552,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_base": os.getenv("AZURE_API_BASE"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
@@ -566,7 +566,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
         )
         selection_counts = defaultdict(int)
 
-        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
+        # call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
         for _ in range(1000):
             selected_model = await router.async_get_available_deployment(
                 "gpt-3.5-turbo", request_kwargs={}
@@ -579,13 +579,13 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
         total_requests = sum(selection_counts.values())
 
         if rpm_list[0] is not None or tpm_list[0] is not None:
-            # Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
+            # Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
             assert (
-                selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
-            ), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
+                selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
+            ), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
         else:
             # Assert both are used
-            assert selection_counts["azure/chatgpt-v-2"] > 0
+            assert selection_counts["azure/chatgpt-v-3"] > 0
             assert selection_counts["gpt-3.5-turbo"] > 0
         router.reset()
     except Exception as e:
diff --git a/tests/local_testing/test_router_init.py b/tests/local_testing/test_router_init.py
index 00b2daa764..dd2d43dc26 100644
--- a/tests/local_testing/test_router_init.py
+++ b/tests/local_testing/test_router_init.py
@@ -40,7 +40,7 @@
 #             {
 #                 "model_name": "gpt-3.5-turbo",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_version": os.getenv("AZURE_API_VERSION"),
 #                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -96,7 +96,7 @@
 #             {
 #                 "model_name": "gpt-3.5-turbo",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_version": os.getenv("AZURE_API_VERSION"),
 #                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -134,7 +134,7 @@
 #             {
 #                 "model_name": "azure-cloudflare",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_version": os.getenv("AZURE_API_VERSION"),
 #                     "api_base": "https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1",
@@ -201,7 +201,7 @@
 #             {
 #                 "model_name": "gpt-3.5-turbo",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_version": os.getenv("AZURE_API_VERSION"),
 #                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -254,7 +254,7 @@
 #             {
 #                 "model_name": "gpt-3.5-turbo",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_version": os.getenv("AZURE_API_VERSION"),
 #                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -615,7 +615,7 @@
 #             {
 #                 "model_name": "gpt-3.5-turbo",
 #                 "litellm_params": {
-#                     "model": "azure/chatgpt-v-2",
+#                     "model": "azure/chatgpt-v-3",
 #                     "api_key": os.getenv("AZURE_API_KEY"),
 #                     "api_version": os.getenv("AZURE_API_VERSION"),
 #                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -660,7 +660,7 @@
 #         {
 #             "model_name": "gpt-3.5-turbo",
 #             "litellm_params": {
-#                 "model": "azure/chatgpt-v-2",
+#                 "model": "azure/chatgpt-v-3",
 #                 "api_key": os.getenv("AZURE_API_KEY"),
 #                 "api_version": os.getenv("AZURE_API_VERSION"),
 #                 "api_base": os.getenv("AZURE_API_BASE"),
diff --git a/tests/local_testing/test_router_policy_violation.py b/tests/local_testing/test_router_policy_violation.py
index 52f50eb591..1e72868db6 100644
--- a/tests/local_testing/test_router_policy_violation.py
+++ b/tests/local_testing/test_router_policy_violation.py
@@ -69,7 +69,7 @@ async def test_async_fallbacks():
         {  # list of model deployments
             "model_name": "azure/gpt-3.5-turbo-context-fallback",  # openai model name
             "litellm_params": {  # params for litellm completion/embedding call
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": os.getenv("AZURE_API_KEY"),
                 "api_version": os.getenv("AZURE_API_VERSION"),
                 "api_base": os.getenv("AZURE_API_BASE"),
diff --git a/tests/local_testing/test_router_retries.py b/tests/local_testing/test_router_retries.py
index 12bd71cfd1..d028010afa 100644
--- a/tests/local_testing/test_router_retries.py
+++ b/tests/local_testing/test_router_retries.py
@@ -166,7 +166,7 @@ async def test_router_retry_policy(error_type):
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -175,7 +175,7 @@ async def test_router_retry_policy(error_type):
             {
                 "model_name": "bad-model",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": "bad-key",
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -275,7 +275,7 @@ async def test_dynamic_router_retry_policy(model_group):
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -287,7 +287,7 @@ async def test_dynamic_router_retry_policy(model_group):
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -299,7 +299,7 @@ async def test_dynamic_router_retry_policy(model_group):
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -311,7 +311,7 @@ async def test_dynamic_router_retry_policy(model_group):
             {
                 "model_name": "bad-model",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": "bad-key",
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -393,7 +393,7 @@ def test_retry_rate_limit_error_with_healthy_deployments():
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -426,7 +426,7 @@ def test_do_retry_rate_limit_error_with_no_fallbacks_and_no_healthy_deployments(
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -459,14 +459,14 @@ def test_raise_context_window_exceeded_error():
         llm_provider="azure",
         model="gpt-3.5-turbo",
     )
-    context_window_fallbacks = [{"gpt-3.5-turbo": ["azure/chatgpt-v-2"]}]
+    context_window_fallbacks = [{"gpt-3.5-turbo": ["azure/chatgpt-v-3"]}]
 
     router = Router(
         model_list=[
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -508,7 +508,7 @@ def test_raise_context_window_exceeded_error_no_retry():
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -562,7 +562,7 @@ def test_timeout_for_rate_limit_error_with_healthy_deployments(
         {
             "model_name": "gpt-3.5-turbo",
             "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": os.getenv("AZURE_API_KEY"),
                 "api_version": os.getenv("AZURE_API_VERSION"),
                 "api_base": os.getenv("AZURE_API_BASE"),
@@ -589,7 +589,7 @@ def test_timeout_for_rate_limit_error_with_healthy_deployments(
                 "litellm_params": {
                     "api_key": "my-key",
                     "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com",
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                 },
                 "model_info": {
                     "id": "0e30bc8a63fa91ae4415d4234e231b3f9e6dd900cac57d118ce13a720d95e9d6",
@@ -615,7 +615,7 @@ def test_timeout_for_rate_limit_error_with_no_healthy_deployments():
         {
             "model_name": "gpt-3.5-turbo",
             "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": os.getenv("AZURE_API_KEY"),
                 "api_version": os.getenv("AZURE_API_VERSION"),
                 "api_base": os.getenv("AZURE_API_BASE"),
@@ -650,7 +650,7 @@ def test_no_retry_for_not_found_error_404():
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -709,7 +709,7 @@ def test_no_retry_when_no_healthy_deployments():
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
diff --git a/tests/local_testing/test_router_timeout.py b/tests/local_testing/test_router_timeout.py
index 3f149a4342..c8d7502eee 100644
--- a/tests/local_testing/test_router_timeout.py
+++ b/tests/local_testing/test_router_timeout.py
@@ -30,7 +30,7 @@ def test_router_timeouts():
         {
             "model_name": "openai-gpt-4",
             "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": "os.environ/AZURE_API_KEY",
                 "api_base": "os.environ/AZURE_API_BASE",
                 "api_version": "os.environ/AZURE_API_VERSION",
diff --git a/tests/local_testing/test_router_utils.py b/tests/local_testing/test_router_utils.py
index 067aaf032a..cd26f8ad60 100644
--- a/tests/local_testing/test_router_utils.py
+++ b/tests/local_testing/test_router_utils.py
@@ -32,7 +32,7 @@ def test_returned_settings():
             {
                 "model_name": "gpt-3.5-turbo",  # openai model name
                 "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": "bad-key",
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -96,7 +96,7 @@ def test_update_kwargs_before_fallbacks_unit_test():
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": "bad-key",
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
@@ -133,7 +133,7 @@ async def test_update_kwargs_before_fallbacks(call_type):
             {
                 "model_name": "gpt-3.5-turbo",
                 "litellm_params": {
-                    "model": "azure/chatgpt-v-2",
+                    "model": "azure/chatgpt-v-3",
                     "api_key": "bad-key",
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
diff --git a/tests/local_testing/test_streaming.py b/tests/local_testing/test_streaming.py
index 1f0730ef29..78226d0eab 100644
--- a/tests/local_testing/test_streaming.py
+++ b/tests/local_testing/test_streaming.py
@@ -241,7 +241,7 @@ tools_schema = [
 def test_completion_azure_stream_special_char():
     litellm.set_verbose = True
     messages = [{"role": "user", "content": "hi. respond with the <xml> tag only"}]
-    response = completion(model="azure/chatgpt-v-2", messages=messages, stream=True)
+    response = completion(model="azure/chatgpt-v-3", messages=messages, stream=True)
     response_str = ""
     for part in response:
         response_str += part.choices[0].delta.content or ""
@@ -449,7 +449,7 @@ def test_completion_azure_stream():
             },
         ]
         response = completion(
-            model="azure/chatgpt-v-2", messages=messages, stream=True, max_tokens=50
+            model="azure/chatgpt-v-3", messages=messages, stream=True, max_tokens=50
         )
         complete_response = ""
         # Add any assertions here to check the response
@@ -2070,7 +2070,7 @@ def test_openai_chat_completion_complete_response_call():
     "model",
     [
         "gpt-3.5-turbo",
-        "azure/chatgpt-v-2",
+        "azure/chatgpt-v-3",
         "claude-3-haiku-20240307",
         "o1-preview",
         "o1",
diff --git a/tests/local_testing/test_timeout.py b/tests/local_testing/test_timeout.py
index b74cf89eaa..9342e789b4 100644
--- a/tests/local_testing/test_timeout.py
+++ b/tests/local_testing/test_timeout.py
@@ -23,7 +23,7 @@ import litellm
     [
         ("gpt-3.5-turbo", "openai"),
         ("anthropic.claude-instant-v1", "bedrock"),
-        ("azure/chatgpt-v-2", "azure"),
+        ("azure/chatgpt-v-3", "azure"),
     ],
 )
 @pytest.mark.parametrize("sync_mode", [True, False])
@@ -104,7 +104,7 @@ def test_hanging_request_azure():
                 {
                     "model_name": "azure-gpt",
                     "litellm_params": {
-                        "model": "azure/chatgpt-v-2",
+                        "model": "azure/chatgpt-v-3",
                         "api_base": os.environ["AZURE_API_BASE"],
                         "api_key": os.environ["AZURE_API_KEY"],
                     },
@@ -158,7 +158,7 @@ def test_hanging_request_openai():
                 {
                     "model_name": "azure-gpt",
                     "litellm_params": {
-                        "model": "azure/chatgpt-v-2",
+                        "model": "azure/chatgpt-v-3",
                         "api_base": os.environ["AZURE_API_BASE"],
                         "api_key": os.environ["AZURE_API_KEY"],
                     },
diff --git a/tests/local_testing/test_tpm_rpm_routing_v2.py b/tests/local_testing/test_tpm_rpm_routing_v2.py
index d2b951a187..57443bbe4c 100644
--- a/tests/local_testing/test_tpm_rpm_routing_v2.py
+++ b/tests/local_testing/test_tpm_rpm_routing_v2.py
@@ -45,7 +45,7 @@ def test_tpm_rpm_updated():
     )
     model_group = "gpt-3.5-turbo"
     deployment_id = "1234"
-    deployment = "azure/chatgpt-v-2"
+    deployment = "azure/chatgpt-v-3"
     total_tokens = 50
     standard_logging_payload: StandardLoggingPayload = create_standard_logging_payload()
     standard_logging_payload["model_group"] = model_group
@@ -100,12 +100,12 @@ def test_get_available_deployments():
     model_list = [
         {
             "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
             "model_info": {"id": "1234"},
         },
         {
             "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "azure/chatgpt-v-2"},
+            "litellm_params": {"model": "azure/chatgpt-v-3"},
             "model_info": {"id": "5678"},
         },
     ]
@@ -116,7 +116,7 @@ def test_get_available_deployments():
     ## DEPLOYMENT 1 ##
     total_tokens = 50
     deployment_id = "1234"
-    deployment = "azure/chatgpt-v-2"
+    deployment = "azure/chatgpt-v-3"
     standard_logging_payload = create_standard_logging_payload()
     standard_logging_payload["model_group"] = model_group
     standard_logging_payload["model_id"] = deployment_id
@@ -721,7 +721,7 @@ async def test_tpm_rpm_routing_model_name_checks():
     deployment = {
         "model_name": "gpt-3.5-turbo",
         "litellm_params": {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
             "api_key": os.getenv("AZURE_API_KEY"),
             "api_base": os.getenv("AZURE_API_BASE"),
             "mock_response": "Hey, how's it going?",
@@ -763,5 +763,5 @@ async def test_tpm_rpm_routing_model_name_checks():
 
         assert (
             standard_logging_payload["hidden_params"]["litellm_model_name"]
-            == "azure/chatgpt-v-2"
+            == "azure/chatgpt-v-3"
         )
diff --git a/tests/logging_callback_tests/test_alerting.py b/tests/logging_callback_tests/test_alerting.py
index fc2eae00f7..26a5e0822f 100644
--- a/tests/logging_callback_tests/test_alerting.py
+++ b/tests/logging_callback_tests/test_alerting.py
@@ -56,7 +56,7 @@ def test_get_api_base_unit_test(model, optional_params, expected_api_base):
 async def test_get_api_base():
     _pl = ProxyLogging(user_api_key_cache=DualCache())
     _pl.update_values(alerting=["slack"], alerting_threshold=100, redis_cache=None)
-    model = "chatgpt-v-2"
+    model = "chatgpt-v-3"
     messages = [{"role": "user", "content": "Hey how's it going?"}]
     litellm_params = {
         "acompletion": True,
diff --git a/tests/logging_callback_tests/test_amazing_s3_logs.py b/tests/logging_callback_tests/test_amazing_s3_logs.py
index 17efb177d0..915041e714 100644
--- a/tests/logging_callback_tests/test_amazing_s3_logs.py
+++ b/tests/logging_callback_tests/test_amazing_s3_logs.py
@@ -244,7 +244,7 @@ async def make_async_calls():
     for _ in range(5):
         task = asyncio.create_task(
             litellm.acompletion(
-                model="azure/chatgpt-v-2",
+                model="azure/chatgpt-v-3",
                 messages=[{"role": "user", "content": "This is a test"}],
                 max_tokens=5,
                 temperature=0.7,
diff --git a/tests/logging_callback_tests/test_spend_logs.py b/tests/logging_callback_tests/test_spend_logs.py
index 972e636b48..d592931f25 100644
--- a/tests/logging_callback_tests/test_spend_logs.py
+++ b/tests/logging_callback_tests/test_spend_logs.py
@@ -40,7 +40,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
 
     input_args: dict = {
         "kwargs": {
-            "model": "chatgpt-v-2",
+            "model": "chatgpt-v-3",
             "messages": [
                 {"role": "system", "content": "you are a helpful assistant.\n"},
                 {"role": "user", "content": "bom dia"},
@@ -89,7 +89,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
                     },
                     "endpoint": "http://localhost:4000/chat/completions",
                     "model_group": "gpt-3.5-turbo",
-                    "deployment": "azure/chatgpt-v-2",
+                    "deployment": "azure/chatgpt-v-3",
                     "model_info": {
                         "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
                         "db_model": False,
@@ -99,7 +99,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
                     "error_information": None,
                     "status": "success",
                     "proxy_server_request": "{}",
-                    "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
+                    "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
                 },
                 "model_info": {
                     "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
@@ -158,7 +158,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
                 "api_base": "openai-gpt-4-test-v-1.openai.azure.com",
                 "acompletion": True,
                 "complete_input_dict": {
-                    "model": "chatgpt-v-2",
+                    "model": "chatgpt-v-3",
                     "messages": [
                         {"role": "system", "content": "you are a helpful assistant.\n"},
                         {"role": "user", "content": "bom dia"},
diff --git a/tests/old_proxy_tests/tests/load_test_q.py b/tests/old_proxy_tests/tests/load_test_q.py
index 17fa185215..a0e22eda5a 100644
--- a/tests/old_proxy_tests/tests/load_test_q.py
+++ b/tests/old_proxy_tests/tests/load_test_q.py
@@ -25,7 +25,7 @@ config = {
         {
             "model_name": "gpt-3.5-turbo",
             "litellm_params": {
-                "model": "azure/chatgpt-v-2",
+                "model": "azure/chatgpt-v-3",
                 "api_key": os.environ["AZURE_API_KEY"],
                 "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
                 "api_version": "2023-07-01-preview",
diff --git a/tests/old_proxy_tests/tests/test_langchain_request.py b/tests/old_proxy_tests/tests/test_langchain_request.py
index e94a077cc8..901edd783a 100644
--- a/tests/old_proxy_tests/tests/test_langchain_request.py
+++ b/tests/old_proxy_tests/tests/test_langchain_request.py
@@ -9,7 +9,7 @@
 
 # chat = ChatOpenAI(
 #     openai_api_base="http://0.0.0.0:8000",
-#     model = "azure/chatgpt-v-2",
+#     model = "azure/chatgpt-v-3",
 #     temperature=0.1,
 #     extra_body={
 #         "metadata": {
diff --git a/tests/old_proxy_tests/tests/test_openai_exception_request.py b/tests/old_proxy_tests/tests/test_openai_exception_request.py
index 46090e1c89..68b8997766 100644
--- a/tests/old_proxy_tests/tests/test_openai_exception_request.py
+++ b/tests/old_proxy_tests/tests/test_openai_exception_request.py
@@ -39,7 +39,7 @@ client = openai.AzureOpenAI(
 )
 try:
     response = client.chat.completions.create(
-        model="chatgpt-v-2",
+        model="chatgpt-v-3",
         messages=[
             {
                 "role": "user",
diff --git a/tests/old_proxy_tests/tests/test_openai_request.py b/tests/old_proxy_tests/tests/test_openai_request.py
index bb7bf22687..41b8c43f2d 100644
--- a/tests/old_proxy_tests/tests/test_openai_request.py
+++ b/tests/old_proxy_tests/tests/test_openai_request.py
@@ -4,7 +4,7 @@ client = openai.OpenAI(api_key="hi", base_url="http://0.0.0.0:8000")
 
 # # request sent to model set on litellm proxy, `litellm --model`
 response = client.chat.completions.create(
-    model="azure/chatgpt-v-2",
+    model="azure/chatgpt-v-3",
     messages=[
         {"role": "user", "content": "this is a test request, write a short poem"}
     ],
diff --git a/tests/proxy_unit_tests/example_config_yaml/azure_config.yaml b/tests/proxy_unit_tests/example_config_yaml/azure_config.yaml
index fd5865cd7c..111813c884 100644
--- a/tests/proxy_unit_tests/example_config_yaml/azure_config.yaml
+++ b/tests/proxy_unit_tests/example_config_yaml/azure_config.yaml
@@ -1,7 +1,7 @@
 model_list:
   - model_name: gpt-4-team1
     litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
       api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
       api_version: "2023-05-15"
       api_key:  os.environ/AZURE_API_KEY
diff --git a/tests/proxy_unit_tests/test_configs/test_bad_config.yaml b/tests/proxy_unit_tests/test_configs/test_bad_config.yaml
index 7c802a8408..0a16ecb3c5 100644
--- a/tests/proxy_unit_tests/test_configs/test_bad_config.yaml
+++ b/tests/proxy_unit_tests/test_configs/test_bad_config.yaml
@@ -5,12 +5,12 @@ model_list:
       model: gpt-3.5-turbo
   - model_name: working-azure-gpt-3.5-turbo
     litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
       api_base: os.environ/AZURE_API_BASE
       api_key: os.environ/AZURE_API_KEY
   - model_name: azure-gpt-3.5-turbo
     litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
       api_base: os.environ/AZURE_API_BASE
       api_key: bad-key
   - model_name: azure-embedding
diff --git a/tests/proxy_unit_tests/test_configs/test_cloudflare_azure_with_cache_config.yaml b/tests/proxy_unit_tests/test_configs/test_cloudflare_azure_with_cache_config.yaml
index c3c3cb1c32..aeadbeb872 100644
--- a/tests/proxy_unit_tests/test_configs/test_cloudflare_azure_with_cache_config.yaml
+++ b/tests/proxy_unit_tests/test_configs/test_cloudflare_azure_with_cache_config.yaml
@@ -1,7 +1,7 @@
 model_list:
   - model_name: azure-cloudflare
     litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
       api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
       api_key: os.environ/AZURE_API_KEY
       api_version: 2023-07-01-preview
diff --git a/tests/proxy_unit_tests/test_configs/test_config_no_auth.yaml b/tests/proxy_unit_tests/test_configs/test_config_no_auth.yaml
index 1c5ddf2266..075bf7a09d 100644
--- a/tests/proxy_unit_tests/test_configs/test_config_no_auth.yaml
+++ b/tests/proxy_unit_tests/test_configs/test_config_no_auth.yaml
@@ -12,7 +12,7 @@ model_list:
 - litellm_params:
     api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
     api_key: os.environ/AZURE_API_KEY
-    model: azure/chatgpt-v-2
+    model: azure/chatgpt-v-3
   model_name: azure-cloudflare-model
 - litellm_params:
     api_base: https://openai-france-1234.openai.azure.com
diff --git a/tests/proxy_unit_tests/test_configs/test_custom_logger.yaml b/tests/proxy_unit_tests/test_configs/test_custom_logger.yaml
index 145c618edd..2ad500b36f 100644
--- a/tests/proxy_unit_tests/test_configs/test_custom_logger.yaml
+++ b/tests/proxy_unit_tests/test_configs/test_custom_logger.yaml
@@ -1,7 +1,7 @@
 model_list: 
   - model_name: Azure OpenAI GPT-4 Canada
     litellm_params:
-      model: azure/chatgpt-v-2
+      model: azure/chatgpt-v-3
       api_base: os.environ/AZURE_API_BASE
       api_key: os.environ/AZURE_API_KEY
       api_version: "2023-07-01-preview"
diff --git a/tests/proxy_unit_tests/test_key_generate_prisma.py b/tests/proxy_unit_tests/test_key_generate_prisma.py
index d904de13b4..98cccccc79 100644
--- a/tests/proxy_unit_tests/test_key_generate_prisma.py
+++ b/tests/proxy_unit_tests/test_key_generate_prisma.py
@@ -1546,7 +1546,7 @@ def test_call_with_key_over_budget(prisma_client):
             )
             await proxy_db_logger._PROXY_track_cost_callback(
                 kwargs={
-                    "model": "chatgpt-v-2",
+                    "model": "chatgpt-v-3",
                     "stream": False,
                     "litellm_params": {
                         "metadata": {
@@ -1578,7 +1578,7 @@ def test_call_with_key_over_budget(prisma_client):
 
             assert spend_log.request_id == request_id
             assert spend_log.spend == float("2e-05")
-            assert spend_log.model == "chatgpt-v-2"
+            assert spend_log.model == "chatgpt-v-3"
             assert (
                 spend_log.cache_key
                 == "c891d64397a472e6deb31b87a5ac4d3ed5b2dcc069bc87e2afe91e6d64e95a1e"
@@ -1669,7 +1669,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
             proxy_db_logger = _ProxyDBLogger()
             await proxy_db_logger._PROXY_track_cost_callback(
                 kwargs={
-                    "model": "chatgpt-v-2",
+                    "model": "chatgpt-v-3",
                     "stream": False,
                     "litellm_params": {
                         "metadata": {
@@ -1702,7 +1702,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
 
             assert spend_log.request_id == request_id
             assert spend_log.spend == float("2e-05")
-            assert spend_log.model == "chatgpt-v-2"
+            assert spend_log.model == "chatgpt-v-3"
             assert (
                 spend_log.cache_key
                 == "c891d64397a472e6deb31b87a5ac4d3ed5b2dcc069bc87e2afe91e6d64e95a1e"
@@ -1757,7 +1757,7 @@ async def test_call_with_key_over_model_budget(
 
     try:
 
-        # set budget for chatgpt-v-2 to 0.000001, expect the next request to fail
+        # set budget for chatgpt-v-3 to 0.000001, expect the next request to fail
         model_max_budget = {
             "gpt-4o-mini": {
                 "budget_limit": "0.000001",
@@ -1898,7 +1898,7 @@ async def test_call_with_key_never_over_budget(prisma_client):
         )
         await proxy_db_logger._PROXY_track_cost_callback(
             kwargs={
-                "model": "chatgpt-v-2",
+                "model": "chatgpt-v-3",
                 "stream": False,
                 "litellm_params": {
                     "metadata": {
@@ -1987,7 +1987,7 @@ async def test_call_with_key_over_budget_stream(prisma_client):
         await proxy_db_logger._PROXY_track_cost_callback(
             kwargs={
                 "call_type": "acompletion",
-                "model": "sagemaker-chatgpt-v-2",
+                "model": "sagemaker-chatgpt-v-3",
                 "stream": True,
                 "complete_streaming_response": resp,
                 "litellm_params": {
@@ -2431,7 +2431,7 @@ async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
     await proxy_db_logger._PROXY_track_cost_callback(
         kwargs={
             "call_type": "acompletion",
-            "model": "sagemaker-chatgpt-v-2",
+            "model": "sagemaker-chatgpt-v-3",
             "stream": True,
             "complete_streaming_response": resp,
             "litellm_params": {
diff --git a/tests/proxy_unit_tests/test_proxy_custom_logger.py b/tests/proxy_unit_tests/test_proxy_custom_logger.py
index ad60335152..bdad7c9d7d 100644
--- a/tests/proxy_unit_tests/test_proxy_custom_logger.py
+++ b/tests/proxy_unit_tests/test_proxy_custom_logger.py
@@ -164,7 +164,7 @@ def test_chat_completion(client):
             my_custom_logger.async_success == True
         )  # checks if the status of async_success is True, only the async_log_success_event can set this to true
         assert (
-            my_custom_logger.async_completion_kwargs["model"] == "chatgpt-v-2"
+            my_custom_logger.async_completion_kwargs["model"] == "chatgpt-v-3"
         )  # checks if kwargs passed to async_log_success_event are correct
         print(
             "\n\n Custom Logger Async Completion args",
diff --git a/tests/proxy_unit_tests/test_proxy_pass_user_config.py b/tests/proxy_unit_tests/test_proxy_pass_user_config.py
index 12def1160f..3ecc252264 100644
--- a/tests/proxy_unit_tests/test_proxy_pass_user_config.py
+++ b/tests/proxy_unit_tests/test_proxy_pass_user_config.py
@@ -64,7 +64,7 @@ def test_chat_completion(client_no_auth):
             ModelConfig(
                 model_name="user-azure-instance",
                 litellm_params=CompletionRequest(
-                    model="azure/chatgpt-v-2",
+                    model="azure/chatgpt-v-3",
                     api_key=os.getenv("AZURE_API_KEY"),
                     api_version=os.getenv("AZURE_API_VERSION"),
                     api_base=os.getenv("AZURE_API_BASE"),
diff --git a/tests/proxy_unit_tests/test_proxy_server.py b/tests/proxy_unit_tests/test_proxy_server.py
index 68f4ff8ec4..dda39d2bd5 100644
--- a/tests/proxy_unit_tests/test_proxy_server.py
+++ b/tests/proxy_unit_tests/test_proxy_server.py
@@ -446,7 +446,7 @@ def test_chat_completion_azure(mock_acompletion, client_no_auth):
     try:
         # Your test data
         test_data = {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
             "messages": [
                 {"role": "user", "content": "write 1 sentence poem"},
             ],
@@ -457,7 +457,7 @@ def test_chat_completion_azure(mock_acompletion, client_no_auth):
         response = client_no_auth.post("/v1/chat/completions", json=test_data)
 
         mock_acompletion.assert_called_once_with(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=[
                 {"role": "user", "content": "write 1 sentence poem"},
             ],
@@ -489,19 +489,19 @@ def test_openai_deployments_model_chat_completions_azure(
     try:
         # Your test data
         test_data = {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
             "messages": [
                 {"role": "user", "content": "write 1 sentence poem"},
             ],
             "max_tokens": 10,
         }
 
-        url = "/openai/deployments/azure/chatgpt-v-2/chat/completions"
+        url = "/openai/deployments/azure/chatgpt-v-3/chat/completions"
         print(f"testing proxy server with Azure Request {url}")
         response = client_no_auth.post(url, json=test_data)
 
         mock_acompletion.assert_called_once_with(
-            model="azure/chatgpt-v-2",
+            model="azure/chatgpt-v-3",
             messages=[
                 {"role": "user", "content": "write 1 sentence poem"},
             ],
@@ -1314,7 +1314,7 @@ async def test_add_callback_via_key(prisma_client):
     try:
         # Your test data
         test_data = {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
             "messages": [
                 {"role": "user", "content": "write 1 sentence poem"},
             ],
@@ -1408,7 +1408,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(
     request._url = URL(url="/chat/completions")
 
     test_data = {
-        "model": "azure/chatgpt-v-2",
+        "model": "azure/chatgpt-v-3",
         "messages": [
             {"role": "user", "content": "write 1 sentence poem"},
         ],
@@ -1423,7 +1423,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(
 
     data = {
         "data": {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
             "messages": [{"role": "user", "content": "write 1 sentence poem"}],
             "max_tokens": 10,
             "mock_response": "Hello world",
@@ -1523,7 +1523,7 @@ async def test_disable_fallbacks_by_key(disable_fallbacks_set):
 
     key_metadata = {"disable_fallbacks": disable_fallbacks_set}
     existing_data = {
-        "model": "azure/chatgpt-v-2",
+        "model": "azure/chatgpt-v-3",
         "messages": [{"role": "user", "content": "write 1 sentence poem"}],
     }
     data = LiteLLMProxyRequestSetup.add_key_level_controls(
@@ -1564,7 +1564,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_gcs_bucket(
     request._url = URL(url="/chat/completions")
 
     test_data = {
-        "model": "azure/chatgpt-v-2",
+        "model": "azure/chatgpt-v-3",
         "messages": [
             {"role": "user", "content": "write 1 sentence poem"},
         ],
@@ -1579,7 +1579,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_gcs_bucket(
 
     data = {
         "data": {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
             "messages": [{"role": "user", "content": "write 1 sentence poem"}],
             "max_tokens": 10,
             "mock_response": "Hello world",
@@ -1697,7 +1697,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_langsmith(
     request._url = URL(url="/chat/completions")
 
     test_data = {
-        "model": "azure/chatgpt-v-2",
+        "model": "azure/chatgpt-v-3",
         "messages": [
             {"role": "user", "content": "write 1 sentence poem"},
         ],
@@ -1712,7 +1712,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_langsmith(
 
     data = {
         "data": {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
             "messages": [{"role": "user", "content": "write 1 sentence poem"}],
             "max_tokens": 10,
             "mock_response": "Hello world",
diff --git a/tests/proxy_unit_tests/test_proxy_server_keys.py b/tests/proxy_unit_tests/test_proxy_server_keys.py
index 6eb41202cd..8b8e943ba7 100644
--- a/tests/proxy_unit_tests/test_proxy_server_keys.py
+++ b/tests/proxy_unit_tests/test_proxy_server_keys.py
@@ -171,7 +171,7 @@
 #         model_data = {
 #             "model_name": "azure-model",
 #             "litellm_params": {
-#                 "model": "azure/chatgpt-v-2",
+#                 "model": "azure/chatgpt-v-3",
 #                 "api_key": os.getenv("AZURE_API_KEY"),
 #                 "api_base": os.getenv("AZURE_API_BASE"),
 #                 "api_version": os.getenv("AZURE_API_VERSION")
diff --git a/tests/test_models.py b/tests/test_models.py
index 31e564a829..89944c07b3 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -67,7 +67,7 @@ async def add_models(session, model_id="123", model_name="azure-gpt-3.5", key="s
     data = {
         "model_name": model_name,
         "litellm_params": {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
             "api_key": "os.environ/AZURE_API_KEY",
             "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
             "api_version": "2023-05-15",
@@ -100,7 +100,7 @@ async def update_model(session, model_id="123", model_name="azure-gpt-3.5", key=
     data = {
         "model_name": model_name,
         "litellm_params": {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
             "api_key": "os.environ/AZURE_API_KEY",
             "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
             "api_version": "2023-05-15",
@@ -292,7 +292,7 @@ async def add_model_for_health_checking(session, model_id="123"):
     data = {
         "model_name": f"azure-model-health-check-{model_id}",
         "litellm_params": {
-            "model": "azure/chatgpt-v-2",
+            "model": "azure/chatgpt-v-3",
             "api_key": os.getenv("AZURE_API_KEY"),
             "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
             "api_version": "2023-05-15",
@@ -417,7 +417,7 @@ async def test_add_model_run_health():
 
         assert _health_info["healthy_count"] == 1
         assert (
-            _healthy_endpooint["model"] == "azure/chatgpt-v-2"
+            _healthy_endpooint["model"] == "azure/chatgpt-v-3"
         )  # this is the model that got added
 
         # assert httpx client is is unchanges

From 3e87ec4f16e8b2d722b3e0cae5ef77812dd63fe7 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Fri, 18 Apr 2025 14:23:16 -0700
Subject: [PATCH 5/5] test: replace removed fireworks ai models

---
 tests/local_testing/test_completion_cost.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py
index 3e30041489..3f2be26036 100644
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@@ -1284,7 +1284,7 @@ from litellm.llms.fireworks_ai.cost_calculator import get_base_model_for_pricing
     "model, base_model",
     [
         ("fireworks_ai/llama-v3p1-405b-instruct", "fireworks-ai-default"),
-        ("fireworks_ai/mixtral-8x7b-instruct", "fireworks-ai-moe-up-to-56b"),
+        ("fireworks_ai/llama4-maverick-instruct-basic", "fireworks-ai-moe-up-to-56b"),
     ],
 )
 def test_get_model_params_fireworks_ai(model, base_model):
@@ -1294,7 +1294,7 @@ def test_get_model_params_fireworks_ai(model, base_model):
 
 @pytest.mark.parametrize(
     "model",
-    ["fireworks_ai/llama-v3p1-405b-instruct", "fireworks_ai/mixtral-8x7b-instruct"],
+    ["fireworks_ai/llama-v3p1-405b-instruct", "fireworks_ai/llama4-maverick-instruct-basic"],
 )
 def test_completion_cost_fireworks_ai(model):
     os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"