forked from phoenix/litellm-mirror
openai return response headers
This commit is contained in:
parent
725cd91064
commit
64dbe07593
1 changed files with 106 additions and 6 deletions
|
@ -784,6 +784,34 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
def make_sync_openai_chat_completion_request(
|
||||||
|
self,
|
||||||
|
openai_client: OpenAI,
|
||||||
|
data: dict,
|
||||||
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Helper to:
|
||||||
|
- call chat.completions.create.with_raw_response when litellm.return_response_headers is True
|
||||||
|
- call chat.completions.create by default
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if litellm.return_response_headers is True:
|
||||||
|
raw_response = openai_client.chat.completions.with_raw_response.create(
|
||||||
|
**data, timeout=timeout
|
||||||
|
)
|
||||||
|
|
||||||
|
headers = dict(raw_response.headers)
|
||||||
|
response = raw_response.parse()
|
||||||
|
return headers, response
|
||||||
|
else:
|
||||||
|
response = openai_client.chat.completions.create(
|
||||||
|
**data, timeout=timeout
|
||||||
|
)
|
||||||
|
return None, response
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
def completion(
|
def completion(
|
||||||
self,
|
self,
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
|
@ -913,7 +941,15 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
response = openai_client.chat.completions.create(**data, timeout=timeout) # type: ignore
|
headers, response = (
|
||||||
|
self.make_sync_openai_chat_completion_request(
|
||||||
|
openai_client=openai_client,
|
||||||
|
data=data,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
logging_obj.model_call_details["response_headers"] = headers
|
||||||
stringified_response = response.model_dump()
|
stringified_response = response.model_dump()
|
||||||
logging_obj.post_call(
|
logging_obj.post_call(
|
||||||
input=messages,
|
input=messages,
|
||||||
|
@ -1059,7 +1095,13 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
"complete_input_dict": data,
|
"complete_input_dict": data,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
response = openai_client.chat.completions.create(**data, timeout=timeout)
|
headers, response = self.make_sync_openai_chat_completion_request(
|
||||||
|
openai_client=openai_client,
|
||||||
|
data=data,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
logging_obj.model_call_details["response_headers"] = headers
|
||||||
streamwrapper = CustomStreamWrapper(
|
streamwrapper = CustomStreamWrapper(
|
||||||
completion_stream=response,
|
completion_stream=response,
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -1159,6 +1201,31 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
async def make_sync_openai_embedding_request(
|
||||||
|
self,
|
||||||
|
openai_client: OpenAI,
|
||||||
|
data: dict,
|
||||||
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Helper to:
|
||||||
|
- call embeddings.create.with_raw_response when litellm.return_response_headers is True
|
||||||
|
- call embeddings.create by default
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if litellm.return_response_headers is True:
|
||||||
|
raw_response = openai_client.embeddings.with_raw_response.create(
|
||||||
|
**data, timeout=timeout
|
||||||
|
) # type: ignore
|
||||||
|
headers = dict(raw_response.headers)
|
||||||
|
response = raw_response.parse()
|
||||||
|
return headers, response
|
||||||
|
else:
|
||||||
|
response = openai_client.embeddings.create(**data, timeout=timeout) # type: ignore
|
||||||
|
return None, response
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
async def aembedding(
|
async def aembedding(
|
||||||
self,
|
self,
|
||||||
input: list,
|
input: list,
|
||||||
|
@ -1255,15 +1322,19 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
)
|
)
|
||||||
|
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
response = openai_client.embeddings.create(**data, timeout=timeout) # type: ignore
|
headers: Optional[Dict] = None
|
||||||
|
headers, response = self.make_sync_openai_embedding_request(
|
||||||
|
openai_client=openai_client, data=data, timeout=timeout
|
||||||
|
) # type: ignore
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
|
logging_obj.model_call_details["response_headers"] = headers
|
||||||
logging_obj.post_call(
|
logging_obj.post_call(
|
||||||
input=input,
|
input=input,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
additional_args={"complete_input_dict": data},
|
additional_args={"complete_input_dict": data},
|
||||||
original_response=response,
|
original_response=response,
|
||||||
)
|
)
|
||||||
|
|
||||||
return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding") # type: ignore
|
return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding") # type: ignore
|
||||||
except OpenAIError as e:
|
except OpenAIError as e:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -1427,6 +1498,33 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
async def make_sync_openai_audio_transcriptions_request(
|
||||||
|
self,
|
||||||
|
openai_client: OpenAI,
|
||||||
|
data: dict,
|
||||||
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Helper to:
|
||||||
|
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
|
||||||
|
- call openai_aclient.audio.transcriptions.create by default
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if litellm.return_response_headers is True:
|
||||||
|
raw_response = (
|
||||||
|
openai_client.audio.transcriptions.with_raw_response.create(
|
||||||
|
**data, timeout=timeout
|
||||||
|
)
|
||||||
|
) # type: ignore
|
||||||
|
headers = dict(raw_response.headers)
|
||||||
|
response = raw_response.parse()
|
||||||
|
return headers, response
|
||||||
|
else:
|
||||||
|
response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
|
||||||
|
return None, response
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
def audio_transcriptions(
|
def audio_transcriptions(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
@ -1462,8 +1560,10 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
)
|
)
|
||||||
response = openai_client.audio.transcriptions.create(
|
response = self.make_sync_openai_audio_transcriptions_request(
|
||||||
**data, timeout=timeout # type: ignore
|
openai_client=openai_client,
|
||||||
|
data=data,
|
||||||
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
|
|
||||||
if isinstance(response, BaseModel):
|
if isinstance(response, BaseModel):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue