diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 34eebb712..779203259 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -1310,6 +1310,58 @@ def test_completion_azure_gpt4_vision(): # test_completion_azure_gpt4_vision() +def test_completion_openai_response_headers(): + """ + Tests if LiteLLM reurns response hea + """ + litellm.return_response_headers = True + + # /chat/completion + messages = [ + { + "role": "user", + "content": "hi", + } + ] + + response = completion( + model="gpt-4o-mini", + messages=messages, + ) + + print(f"response: {response}") + + print("response_headers=", response.response_headers) + assert response.response_headers is not None + assert "x-ratelimit-remaining-tokens" in response.response_headers + + # /chat/completion with streaming + + streaming_response = litellm.completion( + model="gpt-4o-mini", + messages=messages, + stream=True, + ) + response_headers = streaming_response.response_headers + print("streaming response_headers=", response_headers) + assert response_headers is not None + assert "x-ratelimit-remaining-tokens" in response_headers + + for chunk in streaming_response: + print("chunk=", chunk) + + # embedding + embedding_response = litellm.embedding( + model="text-embedding-ada-002", + input="hello", + ) + + embedding_response_headers = embedding_response.response_headers + print("embedding_response_headers=", embedding_response_headers) + assert embedding_response_headers is not None + assert "x-ratelimit-remaining-tokens" in embedding_response_headers + + @pytest.mark.parametrize("model", ["gpt-3.5-turbo", "gpt-4", "gpt-4o"]) def test_completion_openai_params(model): litellm.drop_params = True