diff --git a/litellm/__init__.py b/litellm/__init__.py
index 0fa822a98..a9e6b69ae 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -125,6 +125,9 @@ llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all"
 ##################
 ### PREVIEW FEATURES ###
 enable_preview_features: bool = False
+return_response_headers: bool = (
+    False  # get response headers from LLM Api providers - example x-remaining-requests,
+)
 ##################
 logging: bool = True
 caching: bool = (
diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py
index 32e63b957..d98dd9d8c 100644
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@@ -652,6 +652,31 @@ class OpenAIChatCompletion(BaseLLM):
         else:
             return client
 
+    async def make_openai_chat_completion_request(
+        self,
+        openai_aclient: AsyncOpenAI,
+        data: dict,
+        timeout: Union[float, httpx.Timeout],
+    ):
+        try:
+            if litellm.return_response_headers is True:
+                raw_response = (
+                    await openai_aclient.chat.completions.with_raw_response.create(
+                        **data, timeout=timeout
+                    )
+                )
+
+                headers = dict(raw_response.headers)
+                response = raw_response.parse()
+                return headers, response
+            else:
+                response = await openai_aclient.chat.completions.create(
+                    **data, timeout=timeout
+                )
+                return None, response
+        except Exception as e:
+            raise e
+
     def completion(
         self,
         model_response: ModelResponse,
@@ -869,8 +894,8 @@ class OpenAIChatCompletion(BaseLLM):
                 },
             )
 
-            response = await openai_aclient.chat.completions.create(
-                **data, timeout=timeout
+            headers, response = await self.make_openai_chat_completion_request(
+                openai_aclient=openai_aclient, data=data, timeout=timeout
             )
             stringified_response = response.model_dump()
             logging_obj.post_call(
@@ -965,8 +990,8 @@ class OpenAIChatCompletion(BaseLLM):
                 },
             )
 
-            response = await openai_aclient.chat.completions.create(
-                **data, timeout=timeout
+            headers, response = await self.make_openai_chat_completion_request(
+                openai_aclient=openai_aclient, data=data, timeout=timeout
             )
             streamwrapper = CustomStreamWrapper(
                 completion_stream=response,