refactor(openai/azure.py): move to returning openai/azure response headers by default

Allows token tracking to work more reliably across multiple azure/openai deployments
This commit is contained in:
Krrish Dholakia 2024-08-02 09:42:08 -07:00
parent cd073d5ad3
commit fe2aa706e8
2 changed files with 13 additions and 27 deletions

View file

@ -474,21 +474,13 @@ class AzureChatCompletion(BaseLLM):
- call chat.completions.create by default - call chat.completions.create by default
""" """
try: try:
if litellm.return_response_headers is True: raw_response = await azure_client.chat.completions.with_raw_response.create(
raw_response = (
await azure_client.chat.completions.with_raw_response.create(
**data, timeout=timeout **data, timeout=timeout
) )
)
headers = dict(raw_response.headers) headers = dict(raw_response.headers)
response = raw_response.parse() response = raw_response.parse()
return headers, response return headers, response
else:
response = await azure_client.chat.completions.create(
**data, timeout=timeout
)
return None, response
except Exception as e: except Exception as e:
raise e raise e

View file

@ -768,7 +768,6 @@ class OpenAIChatCompletion(BaseLLM):
- call chat.completions.create by default - call chat.completions.create by default
""" """
try: try:
if litellm.return_response_headers is True:
raw_response = ( raw_response = (
await openai_aclient.chat.completions.with_raw_response.create( await openai_aclient.chat.completions.with_raw_response.create(
**data, timeout=timeout **data, timeout=timeout
@ -778,11 +777,6 @@ class OpenAIChatCompletion(BaseLLM):
headers = dict(raw_response.headers) headers = dict(raw_response.headers)
response = raw_response.parse() response = raw_response.parse()
return headers, response return headers, response
else:
response = await openai_aclient.chat.completions.create(
**data, timeout=timeout
)
return None, response
except Exception as e: except Exception as e:
raise e raise e