openai return response headers

This commit is contained in:
Ishaan Jaff 2024-07-20 14:07:41 -07:00
parent 725cd91064
commit 64dbe07593

View file

@ -784,6 +784,34 @@ class OpenAIChatCompletion(BaseLLM):
except Exception as e: except Exception as e:
raise e raise e
def make_sync_openai_chat_completion_request(
self,
openai_client: OpenAI,
data: dict,
timeout: Union[float, httpx.Timeout],
):
"""
Helper to:
- call chat.completions.create.with_raw_response when litellm.return_response_headers is True
- call chat.completions.create by default
"""
try:
if litellm.return_response_headers is True:
raw_response = openai_client.chat.completions.with_raw_response.create(
**data, timeout=timeout
)
headers = dict(raw_response.headers)
response = raw_response.parse()
return headers, response
else:
response = openai_client.chat.completions.create(
**data, timeout=timeout
)
return None, response
except Exception as e:
raise e
def completion( def completion(
self, self,
model_response: ModelResponse, model_response: ModelResponse,
@ -913,7 +941,15 @@ class OpenAIChatCompletion(BaseLLM):
}, },
) )
response = openai_client.chat.completions.create(**data, timeout=timeout) # type: ignore headers, response = (
self.make_sync_openai_chat_completion_request(
openai_client=openai_client,
data=data,
timeout=timeout,
)
)
logging_obj.model_call_details["response_headers"] = headers
stringified_response = response.model_dump() stringified_response = response.model_dump()
logging_obj.post_call( logging_obj.post_call(
input=messages, input=messages,
@ -1059,7 +1095,13 @@ class OpenAIChatCompletion(BaseLLM):
"complete_input_dict": data, "complete_input_dict": data,
}, },
) )
response = openai_client.chat.completions.create(**data, timeout=timeout) headers, response = self.make_sync_openai_chat_completion_request(
openai_client=openai_client,
data=data,
timeout=timeout,
)
logging_obj.model_call_details["response_headers"] = headers
streamwrapper = CustomStreamWrapper( streamwrapper = CustomStreamWrapper(
completion_stream=response, completion_stream=response,
model=model, model=model,
@ -1159,6 +1201,31 @@ class OpenAIChatCompletion(BaseLLM):
except Exception as e: except Exception as e:
raise e raise e
async def make_sync_openai_embedding_request(
self,
openai_client: OpenAI,
data: dict,
timeout: Union[float, httpx.Timeout],
):
"""
Helper to:
- call embeddings.create.with_raw_response when litellm.return_response_headers is True
- call embeddings.create by default
"""
try:
if litellm.return_response_headers is True:
raw_response = openai_client.embeddings.with_raw_response.create(
**data, timeout=timeout
) # type: ignore
headers = dict(raw_response.headers)
response = raw_response.parse()
return headers, response
else:
response = openai_client.embeddings.create(**data, timeout=timeout) # type: ignore
return None, response
except Exception as e:
raise e
async def aembedding( async def aembedding(
self, self,
input: list, input: list,
@ -1255,15 +1322,19 @@ class OpenAIChatCompletion(BaseLLM):
) )
## COMPLETION CALL ## COMPLETION CALL
response = openai_client.embeddings.create(**data, timeout=timeout) # type: ignore headers: Optional[Dict] = None
headers, response = self.make_sync_openai_embedding_request(
openai_client=openai_client, data=data, timeout=timeout
) # type: ignore
## LOGGING ## LOGGING
logging_obj.model_call_details["response_headers"] = headers
logging_obj.post_call( logging_obj.post_call(
input=input, input=input,
api_key=api_key, api_key=api_key,
additional_args={"complete_input_dict": data}, additional_args={"complete_input_dict": data},
original_response=response, original_response=response,
) )
return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding") # type: ignore return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding") # type: ignore
except OpenAIError as e: except OpenAIError as e:
exception_mapping_worked = True exception_mapping_worked = True
@ -1427,6 +1498,33 @@ class OpenAIChatCompletion(BaseLLM):
except Exception as e: except Exception as e:
raise e raise e
async def make_sync_openai_audio_transcriptions_request(
self,
openai_client: OpenAI,
data: dict,
timeout: Union[float, httpx.Timeout],
):
"""
Helper to:
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
- call openai_aclient.audio.transcriptions.create by default
"""
try:
if litellm.return_response_headers is True:
raw_response = (
openai_client.audio.transcriptions.with_raw_response.create(
**data, timeout=timeout
)
) # type: ignore
headers = dict(raw_response.headers)
response = raw_response.parse()
return headers, response
else:
response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
return None, response
except Exception as e:
raise e
def audio_transcriptions( def audio_transcriptions(
self, self,
model: str, model: str,
@ -1462,8 +1560,10 @@ class OpenAIChatCompletion(BaseLLM):
timeout=timeout, timeout=timeout,
max_retries=max_retries, max_retries=max_retries,
) )
response = openai_client.audio.transcriptions.create( response = self.make_sync_openai_audio_transcriptions_request(
**data, timeout=timeout # type: ignore openai_client=openai_client,
data=data,
timeout=timeout,
) )
if isinstance(response, BaseModel): if isinstance(response, BaseModel):