diff --git a/litellm/__init__.py b/litellm/__init__.py index 0fa822a98..a9e6b69ae 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -125,6 +125,9 @@ llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all" ################## ### PREVIEW FEATURES ### enable_preview_features: bool = False +return_response_headers: bool = ( + False # get response headers from LLM Api providers - example x-remaining-requests, +) ################## logging: bool = True caching: bool = ( diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 32e63b957..d98dd9d8c 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -652,6 +652,31 @@ class OpenAIChatCompletion(BaseLLM): else: return client + async def make_openai_chat_completion_request( + self, + openai_aclient: AsyncOpenAI, + data: dict, + timeout: Union[float, httpx.Timeout], + ): + try: + if litellm.return_response_headers is True: + raw_response = ( + await openai_aclient.chat.completions.with_raw_response.create( + **data, timeout=timeout + ) + ) + + headers = dict(raw_response.headers) + response = raw_response.parse() + return headers, response + else: + response = await openai_aclient.chat.completions.create( + **data, timeout=timeout + ) + return None, response + except Exception as e: + raise e + def completion( self, model_response: ModelResponse, @@ -869,8 +894,8 @@ class OpenAIChatCompletion(BaseLLM): }, ) - response = await openai_aclient.chat.completions.create( - **data, timeout=timeout + headers, response = await self.make_openai_chat_completion_request( + openai_aclient=openai_aclient, data=data, timeout=timeout ) stringified_response = response.model_dump() logging_obj.post_call( @@ -965,8 +990,8 @@ class OpenAIChatCompletion(BaseLLM): }, ) - response = await openai_aclient.chat.completions.create( - **data, timeout=timeout + headers, response = await self.make_openai_chat_completion_request( + openai_aclient=openai_aclient, data=data, timeout=timeout ) streamwrapper = CustomStreamWrapper( completion_stream=response,