openai return response headers

2024-07-20 14:07:41 -07:00 · 2024-07-20 14:07:41 -07:00 · 64dbe07593
commit 64dbe07593
parent 725cd91064
1 changed files with 106 additions and 6 deletions
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@ -784,6 +784,34 @@ class OpenAIChatCompletion(BaseLLM):
        except Exception as e:
            raise e
    def make_sync_openai_chat_completion_request(
        self,
        openai_client: OpenAI,
        data: dict,
        timeout: Union[float, httpx.Timeout],
    ):
        """
        Helper to:
        - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
        - call chat.completions.create by default
        """
        try:
            if litellm.return_response_headers is True:
                raw_response = openai_client.chat.completions.with_raw_response.create(
                    **data, timeout=timeout
                )
                headers = dict(raw_response.headers)
                response = raw_response.parse()
                return headers, response
            else:
                response = openai_client.chat.completions.create(
                    **data, timeout=timeout
                )
                return None, response
        except Exception as e:
            raise e
    def completion(
        self,
        model_response: ModelResponse,
@ -913,7 +941,15 @@ class OpenAIChatCompletion(BaseLLM):
                            },
                        )
-                        response = openai_client.chat.completions.create(**data, timeout=timeout)  # type: ignore
+                        headers, response = (
                            self.make_sync_openai_chat_completion_request(
                                openai_client=openai_client,
                                data=data,
                                timeout=timeout,
                            )
                        )
                        logging_obj.model_call_details["response_headers"] = headers
                        stringified_response = response.model_dump()
                        logging_obj.post_call(
                            input=messages,
@ -1059,7 +1095,13 @@ class OpenAIChatCompletion(BaseLLM):
                "complete_input_dict": data,
            },
        )
-        response = openai_client.chat.completions.create(**data, timeout=timeout)
+        headers, response = self.make_sync_openai_chat_completion_request(
            openai_client=openai_client,
            data=data,
            timeout=timeout,
        )
        logging_obj.model_call_details["response_headers"] = headers
        streamwrapper = CustomStreamWrapper(
            completion_stream=response,
            model=model,
@ -1159,6 +1201,31 @@ class OpenAIChatCompletion(BaseLLM):
        except Exception as e:
            raise e
    async def make_sync_openai_embedding_request(
        self,
        openai_client: OpenAI,
        data: dict,
        timeout: Union[float, httpx.Timeout],
    ):
        """
        Helper to:
        - call embeddings.create.with_raw_response when litellm.return_response_headers is True
        - call embeddings.create by default
        """
        try:
            if litellm.return_response_headers is True:
                raw_response = openai_client.embeddings.with_raw_response.create(
                    **data, timeout=timeout
                )  # type: ignore
                headers = dict(raw_response.headers)
                response = raw_response.parse()
                return headers, response
            else:
                response = openai_client.embeddings.create(**data, timeout=timeout)  # type: ignore
                return None, response
        except Exception as e:
            raise e
    async def aembedding(
        self,
        input: list,
@ -1255,15 +1322,19 @@ class OpenAIChatCompletion(BaseLLM):
            )
            ## COMPLETION CALL
-            response = openai_client.embeddings.create(**data, timeout=timeout)  # type: ignore
+            headers: Optional[Dict] = None
            headers, response = self.make_sync_openai_embedding_request(
                openai_client=openai_client, data=data, timeout=timeout
            )  # type: ignore
            ## LOGGING
            logging_obj.model_call_details["response_headers"] = headers
            logging_obj.post_call(
                input=input,
                api_key=api_key,
                additional_args={"complete_input_dict": data},
                original_response=response,
            )
            return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding")  # type: ignore
        except OpenAIError as e:
            exception_mapping_worked = True
@ -1427,6 +1498,33 @@ class OpenAIChatCompletion(BaseLLM):
        except Exception as e:
            raise e
    async def make_sync_openai_audio_transcriptions_request(
        self,
        openai_client: OpenAI,
        data: dict,
        timeout: Union[float, httpx.Timeout],
    ):
        """
        Helper to:
        - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
        - call openai_aclient.audio.transcriptions.create by default
        """
        try:
            if litellm.return_response_headers is True:
                raw_response = (
                    openai_client.audio.transcriptions.with_raw_response.create(
                        **data, timeout=timeout
                    )
                )  # type: ignore
                headers = dict(raw_response.headers)
                response = raw_response.parse()
                return headers, response
            else:
                response = openai_client.audio.transcriptions.create(**data, timeout=timeout)  # type: ignore
                return None, response
        except Exception as e:
            raise e
    def audio_transcriptions(
        self,
        model: str,
@ -1462,8 +1560,10 @@ class OpenAIChatCompletion(BaseLLM):
            timeout=timeout,
            max_retries=max_retries,
        )
-        response = openai_client.audio.transcriptions.create(
+        response = self.make_sync_openai_audio_transcriptions_request(
-            **data, timeout=timeout  # type: ignore
+            openai_client=openai_client,
            data=data,
            timeout=timeout,
        )
        if isinstance(response, BaseModel):