diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py
index e127ecea6..4b36edff2 100644
--- a/litellm/llms/azure.py
+++ b/litellm/llms/azure.py
@@ -458,6 +458,36 @@ class AzureChatCompletion(BaseLLM):
 
         return azure_client
 
+    async def make_azure_openai_chat_completion_request(
+        self,
+        azure_client: AsyncAzureOpenAI,
+        data: dict,
+        timeout: Union[float, httpx.Timeout],
+    ):
+        """
+        Helper to:
+        - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
+        - call chat.completions.create by default
+        """
+        try:
+            if litellm.return_response_headers is True:
+                raw_response = (
+                    await azure_client.chat.completions.with_raw_response.create(
+                        **data, timeout=timeout
+                    )
+                )
+
+                headers = dict(raw_response.headers)
+                response = raw_response.parse()
+                return headers, response
+            else:
+                response = await azure_client.chat.completions.create(
+                    **data, timeout=timeout
+                )
+                return None, response
+        except Exception as e:
+            raise e
+
     def completion(
         self,
         model: str,
@@ -701,8 +731,11 @@ class AzureChatCompletion(BaseLLM):
                     "complete_input_dict": data,
                 },
             )
-            response = await azure_client.chat.completions.create(
-                **data, timeout=timeout
+
+            headers, response = await self.make_azure_openai_chat_completion_request(
+                azure_client=azure_client,
+                data=data,
+                timeout=timeout,
             )
 
             stringified_response = response.model_dump()
@@ -861,9 +894,13 @@ class AzureChatCompletion(BaseLLM):
                     "complete_input_dict": data,
                 },
             )
-            response = await azure_client.chat.completions.create(
-                **data, timeout=timeout
+
+            headers, response = await self.make_azure_openai_chat_completion_request(
+                azure_client=azure_client,
+                data=data,
+                timeout=timeout,
             )
+
             # return response
             streamwrapper = CustomStreamWrapper(
                 completion_stream=response,
diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py
index d98dd9d8c..c7bb0e353 100644
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@@ -658,6 +658,11 @@ class OpenAIChatCompletion(BaseLLM):
         data: dict,
         timeout: Union[float, httpx.Timeout],
     ):
+        """
+        Helper to:
+        - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
+        - call chat.completions.create by default
+        """
         try:
             if litellm.return_response_headers is True:
                 raw_response = (