return response headers

This commit is contained in:
Ishaan Jaff 2024-07-20 15:26:44 -07:00
parent 2e9f1e8de2
commit 5e52f50a82
2 changed files with 65 additions and 1 deletions

View file

@ -1059,6 +1059,7 @@ class OpenAIChatCompletion(BaseLLM):
response_object=stringified_response,
model_response_object=model_response,
hidden_params={"headers": headers},
response_headers=headers,
)
except Exception as e:
raise e
@ -1159,6 +1160,7 @@ class OpenAIChatCompletion(BaseLLM):
custom_llm_provider="openai",
logging_obj=logging_obj,
stream_options=data.get("stream_options", None),
response_headers=headers,
)
return streamwrapper
except (
@ -1263,7 +1265,12 @@ class OpenAIChatCompletion(BaseLLM):
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, response_type="embedding") # type: ignore
return convert_to_model_response_object(
response_object=stringified_response,
model_response_object=model_response,
response_type="embedding",
response_headers=headers,
) # type: ignore
except Exception as e:
## LOGGING
logging_obj.post_call(

View file

@ -1361,6 +1361,63 @@ def test_completion_openai_response_headers():
assert embedding_response_headers is not None
assert "x-ratelimit-remaining-tokens" in embedding_response_headers
litellm.return_response_headers = False
@pytest.mark.asyncio()
async def test_async_completion_openai_response_headers():
"""
Tests if LiteLLM reurns response hea
"""
litellm.return_response_headers = True
# /chat/completion
messages = [
{
"role": "user",
"content": "hi",
}
]
response = await litellm.acompletion(
model="gpt-4o-mini",
messages=messages,
)
print(f"response: {response}")
print("response_headers=", response.response_headers)
assert response.response_headers is not None
assert "x-ratelimit-remaining-tokens" in response.response_headers
# /chat/completion with streaming
streaming_response = await litellm.acompletion(
model="gpt-4o-mini",
messages=messages,
stream=True,
)
response_headers = streaming_response.response_headers
print("streaming response_headers=", response_headers)
assert response_headers is not None
assert "x-ratelimit-remaining-tokens" in response_headers
async for chunk in streaming_response:
print("chunk=", chunk)
# embedding
embedding_response = await litellm.aembedding(
model="text-embedding-ada-002",
input="hello",
)
embedding_response_headers = embedding_response.response_headers
print("embedding_response_headers=", embedding_response_headers)
assert embedding_response_headers is not None
assert "x-ratelimit-remaining-tokens" in embedding_response_headers
litellm.return_response_headers = False
@pytest.mark.parametrize("model", ["gpt-3.5-turbo", "gpt-4", "gpt-4o"])
def test_completion_openai_params(model):