diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py
index c7613017e..98cc97d53 100644
--- a/litellm/llms/azure.py
+++ b/litellm/llms/azure.py
@@ -724,16 +724,32 @@ class AzureChatCompletion(BaseLLM):
         client_session = litellm.aclient_session or httpx.AsyncClient(
             transport=AsyncCustomHTTPTransport(),  # handle dall-e-2 calls
         )
-        client = AsyncAzureOpenAI(
-            api_version=api_version,
-            azure_endpoint=api_base,
-            api_key=api_key,
-            timeout=timeout,
-            http_client=client_session,
-        )
+        if "gateway.ai.cloudflare.com" in api_base:
+            ## build base url - assume api base includes resource name
+            if not api_base.endswith("/"):
+                api_base += "/"
+            api_base += f"{model}"
+            client = AsyncAzureOpenAI(
+                base_url=api_base,
+                api_version=api_version,
+                api_key=api_key,
+                timeout=timeout,
+                http_client=client_session,
+            )
+            model = None
+            # cloudflare ai gateway, needs model=None
+        else:
+            client = AsyncAzureOpenAI(
+                api_version=api_version,
+                azure_endpoint=api_base,
+                api_key=api_key,
+                timeout=timeout,
+                http_client=client_session,
+            )
 
-        if model is None and mode != "image_generation":
-            raise Exception("model is not set")
+            # only run this check if it's not cloudflare ai gateway
+            if model is None and mode != "image_generation":
+                raise Exception("model is not set")
 
         completion = None
 
diff --git a/litellm/tests/test_configs/test_config_no_auth.yaml b/litellm/tests/test_configs/test_config_no_auth.yaml
index e3bf91456..be85765a8 100644
--- a/litellm/tests/test_configs/test_config_no_auth.yaml
+++ b/litellm/tests/test_configs/test_config_no_auth.yaml
@@ -9,6 +9,11 @@ model_list:
     api_key: os.environ/AZURE_CANADA_API_KEY
     model: azure/gpt-35-turbo
   model_name: azure-model
+- litellm_params:
+    api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
+    api_key: os.environ/AZURE_API_KEY
+    model: azure/chatgpt-v-2
+  model_name: azure-cloudflare-model
 - litellm_params:
     api_base: https://openai-france-1234.openai.azure.com
     api_key: os.environ/AZURE_FRANCE_API_KEY
diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py
index ac4ebb585..294a5a096 100644
--- a/litellm/tests/test_proxy_server.py
+++ b/litellm/tests/test_proxy_server.py
@@ -45,7 +45,7 @@ def client_no_auth():
     filepath = os.path.dirname(os.path.abspath(__file__))
     config_fp = f"{filepath}/test_configs/test_config_no_auth.yaml"
     # initialize can get run in parallel, it sets specific variables for the fast api app, sinc eit gets run in parallel different tests use the wrong variables
-    initialize(config=config_fp)
+    initialize(config=config_fp, debug=True)
     app = FastAPI()
     app.include_router(router)  # Include your router in the test app