feat - return response headers for async openai requests

This commit is contained in:
Ishaan Jaff 2024-07-01 17:01:42 -07:00
parent 4c95782f74
commit 4b7feb3261
2 changed files with 32 additions and 4 deletions

View file

@ -125,6 +125,9 @@ llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all"
################## ##################
### PREVIEW FEATURES ### ### PREVIEW FEATURES ###
enable_preview_features: bool = False enable_preview_features: bool = False
return_response_headers: bool = (
False # get response headers from LLM Api providers - example x-remaining-requests,
)
################## ##################
logging: bool = True logging: bool = True
caching: bool = ( caching: bool = (

View file

@ -652,6 +652,31 @@ class OpenAIChatCompletion(BaseLLM):
else: else:
return client return client
async def make_openai_chat_completion_request(
self,
openai_aclient: AsyncOpenAI,
data: dict,
timeout: Union[float, httpx.Timeout],
):
try:
if litellm.return_response_headers is True:
raw_response = (
await openai_aclient.chat.completions.with_raw_response.create(
**data, timeout=timeout
)
)
headers = dict(raw_response.headers)
response = raw_response.parse()
return headers, response
else:
response = await openai_aclient.chat.completions.create(
**data, timeout=timeout
)
return None, response
except Exception as e:
raise e
def completion( def completion(
self, self,
model_response: ModelResponse, model_response: ModelResponse,
@ -869,8 +894,8 @@ class OpenAIChatCompletion(BaseLLM):
}, },
) )
response = await openai_aclient.chat.completions.create( headers, response = await self.make_openai_chat_completion_request(
**data, timeout=timeout openai_aclient=openai_aclient, data=data, timeout=timeout
) )
stringified_response = response.model_dump() stringified_response = response.model_dump()
logging_obj.post_call( logging_obj.post_call(
@ -965,8 +990,8 @@ class OpenAIChatCompletion(BaseLLM):
}, },
) )
response = await openai_aclient.chat.completions.create( headers, response = await self.make_openai_chat_completion_request(
**data, timeout=timeout openai_aclient=openai_aclient, data=data, timeout=timeout
) )
streamwrapper = CustomStreamWrapper( streamwrapper = CustomStreamWrapper(
completion_stream=response, completion_stream=response,