forked from phoenix/litellm-mirror
feat - return response headers for async openai requests
This commit is contained in:
parent
4c95782f74
commit
4b7feb3261
2 changed files with 32 additions and 4 deletions
|
@ -125,6 +125,9 @@ llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all"
|
||||||
##################
|
##################
|
||||||
### PREVIEW FEATURES ###
|
### PREVIEW FEATURES ###
|
||||||
enable_preview_features: bool = False
|
enable_preview_features: bool = False
|
||||||
|
return_response_headers: bool = (
|
||||||
|
False # get response headers from LLM Api providers - example x-remaining-requests,
|
||||||
|
)
|
||||||
##################
|
##################
|
||||||
logging: bool = True
|
logging: bool = True
|
||||||
caching: bool = (
|
caching: bool = (
|
||||||
|
|
|
@ -652,6 +652,31 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
else:
|
else:
|
||||||
return client
|
return client
|
||||||
|
|
||||||
|
async def make_openai_chat_completion_request(
|
||||||
|
self,
|
||||||
|
openai_aclient: AsyncOpenAI,
|
||||||
|
data: dict,
|
||||||
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
if litellm.return_response_headers is True:
|
||||||
|
raw_response = (
|
||||||
|
await openai_aclient.chat.completions.with_raw_response.create(
|
||||||
|
**data, timeout=timeout
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
headers = dict(raw_response.headers)
|
||||||
|
response = raw_response.parse()
|
||||||
|
return headers, response
|
||||||
|
else:
|
||||||
|
response = await openai_aclient.chat.completions.create(
|
||||||
|
**data, timeout=timeout
|
||||||
|
)
|
||||||
|
return None, response
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
def completion(
|
def completion(
|
||||||
self,
|
self,
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
|
@ -869,8 +894,8 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
response = await openai_aclient.chat.completions.create(
|
headers, response = await self.make_openai_chat_completion_request(
|
||||||
**data, timeout=timeout
|
openai_aclient=openai_aclient, data=data, timeout=timeout
|
||||||
)
|
)
|
||||||
stringified_response = response.model_dump()
|
stringified_response = response.model_dump()
|
||||||
logging_obj.post_call(
|
logging_obj.post_call(
|
||||||
|
@ -965,8 +990,8 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
response = await openai_aclient.chat.completions.create(
|
headers, response = await self.make_openai_chat_completion_request(
|
||||||
**data, timeout=timeout
|
openai_aclient=openai_aclient, data=data, timeout=timeout
|
||||||
)
|
)
|
||||||
streamwrapper = CustomStreamWrapper(
|
streamwrapper = CustomStreamWrapper(
|
||||||
completion_stream=response,
|
completion_stream=response,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue