feat - return response headers for async openai requests

2024-07-01 17:01:42 -07:00 · 2024-07-01 17:01:42 -07:00 · 4b7feb3261
commit 4b7feb3261
parent 4c95782f74
2 changed files with 32 additions and 4 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -125,6 +125,9 @@ llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all"
 ##################
 ### PREVIEW FEATURES ###
 enable_preview_features: bool = False
 return_response_headers: bool = (
    False  # get response headers from LLM Api providers - example x-remaining-requests,
 )
 ##################
 logging: bool = True
 caching: bool = (
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@ -652,6 +652,31 @@ class OpenAIChatCompletion(BaseLLM):
        else:
            return client
    async def make_openai_chat_completion_request(
        self,
        openai_aclient: AsyncOpenAI,
        data: dict,
        timeout: Union[float, httpx.Timeout],
    ):
        try:
            if litellm.return_response_headers is True:
                raw_response = (
                    await openai_aclient.chat.completions.with_raw_response.create(
                        **data, timeout=timeout
                    )
                )
                headers = dict(raw_response.headers)
                response = raw_response.parse()
                return headers, response
            else:
                response = await openai_aclient.chat.completions.create(
                    **data, timeout=timeout
                )
                return None, response
        except Exception as e:
            raise e
    def completion(
        self,
        model_response: ModelResponse,
@ -869,8 +894,8 @@ class OpenAIChatCompletion(BaseLLM):
                },
            )
-            response = await openai_aclient.chat.completions.create(
+            headers, response = await self.make_openai_chat_completion_request(
-                **data, timeout=timeout
+                openai_aclient=openai_aclient, data=data, timeout=timeout
            )
            stringified_response = response.model_dump()
            logging_obj.post_call(
@ -965,8 +990,8 @@ class OpenAIChatCompletion(BaseLLM):
                },
            )
-            response = await openai_aclient.chat.completions.create(
+            headers, response = await self.make_openai_chat_completion_request(
-                **data, timeout=timeout
+                openai_aclient=openai_aclient, data=data, timeout=timeout
            )
            streamwrapper = CustomStreamWrapper(
                completion_stream=response,