From 274bf3e48d5536040d1d9142e9cf8292c062313a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 21 Oct 2024 20:41:35 +0530 Subject: [PATCH] (fix) get_response_headers for Azure OpenAI (#6344) * fix get_response_headers * unit testing for get headers * unit testing for anthropic / azure openai headers * increase test coverage for test_completion_response_ratelimit_headers * fix test rate limit headers --- .../llm_response_utils/get_headers.py | 55 +++++++++++ litellm/utils.py | 39 ++------ .../test_anthropic_completion.py | 70 ++++++++++++++ tests/llm_translation/test_azure_openai.py | 96 +++++++++++++++++++ .../test_get_headers.py | 79 +++++++++++++++ tests/local_testing/test_completion.py | 7 ++ 6 files changed, 316 insertions(+), 30 deletions(-) create mode 100644 litellm/litellm_core_utils/llm_response_utils/get_headers.py create mode 100644 tests/llm_translation/test_azure_openai.py create mode 100644 tests/llm_translation/test_llm_response_utils/test_get_headers.py diff --git a/litellm/litellm_core_utils/llm_response_utils/get_headers.py b/litellm/litellm_core_utils/llm_response_utils/get_headers.py new file mode 100644 index 000000000..58a5f1715 --- /dev/null +++ b/litellm/litellm_core_utils/llm_response_utils/get_headers.py @@ -0,0 +1,55 @@ +from typing import Optional + + +def get_response_headers(_response_headers: Optional[dict] = None) -> dict: + """ + + Sets the Appropriate OpenAI headers for the response and forward all headers as llm_provider-{header} + + Note: _response_headers Passed here should be OpenAI compatible headers + + Args: + _response_headers (Optional[dict], optional): _response_headers. Defaults to None. + + Returns: + dict: _response_headers with OpenAI headers and llm_provider-{header} + + """ + if _response_headers is not None: + openai_headers = {} + if "x-ratelimit-limit-requests" in _response_headers: + openai_headers["x-ratelimit-limit-requests"] = _response_headers[ + "x-ratelimit-limit-requests" + ] + if "x-ratelimit-remaining-requests" in _response_headers: + openai_headers["x-ratelimit-remaining-requests"] = _response_headers[ + "x-ratelimit-remaining-requests" + ] + if "x-ratelimit-limit-tokens" in _response_headers: + openai_headers["x-ratelimit-limit-tokens"] = _response_headers[ + "x-ratelimit-limit-tokens" + ] + if "x-ratelimit-remaining-tokens" in _response_headers: + openai_headers["x-ratelimit-remaining-tokens"] = _response_headers[ + "x-ratelimit-remaining-tokens" + ] + llm_provider_headers = _get_llm_provider_headers(_response_headers) + return {**llm_provider_headers, **openai_headers} + return {} + + +def _get_llm_provider_headers(response_headers: dict) -> dict: + """ + Adds a llm_provider-{header} to all headers that are not already prefixed with llm_provider + + Forward all headers as llm_provider-{header} + + """ + llm_provider_headers = {} + for k, v in response_headers.items(): + if "llm_provider" not in k: + _key = "{}-{}".format("llm_provider", k) + llm_provider_headers[_key] = v + else: + llm_provider_headers[k] = v + return llm_provider_headers diff --git a/litellm/utils.py b/litellm/utils.py index 7f9ba2822..c4525ad7c 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -70,6 +70,9 @@ from litellm.litellm_core_utils.get_llm_provider_logic import ( get_llm_provider, ) from litellm.litellm_core_utils.llm_request_utils import _ensure_extra_body_is_safe +from litellm.litellm_core_utils.llm_response_utils.get_headers import ( + get_response_headers, +) from litellm.litellm_core_utils.redact_messages import ( LiteLLMLoggingObject, redact_message_input_output_from_logging, @@ -5704,36 +5707,12 @@ def convert_to_model_response_object( # noqa: PLR0915 ): received_args = locals() - if _response_headers is not None: - openai_headers = {} - if "x-ratelimit-limit-requests" in _response_headers: - openai_headers["x-ratelimit-limit-requests"] = _response_headers[ - "x-ratelimit-limit-requests" - ] - if "x-ratelimit-remaining-requests" in _response_headers: - openai_headers["x-ratelimit-remaining-requests"] = _response_headers[ - "x-ratelimit-remaining-requests" - ] - if "x-ratelimit-limit-tokens" in _response_headers: - openai_headers["x-ratelimit-limit-tokens"] = _response_headers[ - "x-ratelimit-limit-tokens" - ] - if "x-ratelimit-remaining-tokens" in _response_headers: - openai_headers["x-ratelimit-remaining-tokens"] = _response_headers[ - "x-ratelimit-remaining-tokens" - ] - llm_response_headers = { - "{}-{}".format("llm_provider", k): v for k, v in _response_headers.items() - } - if hidden_params is not None: - hidden_params["additional_headers"] = { - **llm_response_headers, - **openai_headers, - } - else: - hidden_params = { - "additional_headers": {**llm_response_headers, **openai_headers} - } + additional_headers = get_response_headers(_response_headers) + + if hidden_params is None: + hidden_params = {} + hidden_params["additional_headers"] = additional_headers + ### CHECK IF ERROR IN RESPONSE ### - openrouter returns these in the dictionary if ( response_object is not None diff --git a/tests/llm_translation/test_anthropic_completion.py b/tests/llm_translation/test_anthropic_completion.py index ca402903b..9ce951b6c 100644 --- a/tests/llm_translation/test_anthropic_completion.py +++ b/tests/llm_translation/test_anthropic_completion.py @@ -34,6 +34,9 @@ from litellm import ( from litellm.adapters.anthropic_adapter import anthropic_adapter from litellm.types.llms.anthropic import AnthropicResponse +from litellm.llms.anthropic.common_utils import process_anthropic_headers +from httpx import Headers + def test_anthropic_completion_messages_translation(): messages = [{"role": "user", "content": "Hey, how's it going?"}] @@ -457,3 +460,70 @@ def test_anthropic_tool_calling_translation(): assert len(translated_params["messages"]) > 0 assert translated_params["messages"][0]["role"] == "user" + + +def test_process_anthropic_headers_empty(): + result = process_anthropic_headers({}) + assert result == {}, "Expected empty dictionary for no input" + + +def test_process_anthropic_headers_with_all_headers(): + input_headers = Headers( + { + "anthropic-ratelimit-requests-limit": "100", + "anthropic-ratelimit-requests-remaining": "90", + "anthropic-ratelimit-tokens-limit": "10000", + "anthropic-ratelimit-tokens-remaining": "9000", + "other-header": "value", + } + ) + + expected_output = { + "x-ratelimit-limit-requests": "100", + "x-ratelimit-remaining-requests": "90", + "x-ratelimit-limit-tokens": "10000", + "x-ratelimit-remaining-tokens": "9000", + "llm_provider-anthropic-ratelimit-requests-limit": "100", + "llm_provider-anthropic-ratelimit-requests-remaining": "90", + "llm_provider-anthropic-ratelimit-tokens-limit": "10000", + "llm_provider-anthropic-ratelimit-tokens-remaining": "9000", + "llm_provider-other-header": "value", + } + + result = process_anthropic_headers(input_headers) + assert result == expected_output, "Unexpected output for all Anthropic headers" + + +def test_process_anthropic_headers_with_partial_headers(): + input_headers = Headers( + { + "anthropic-ratelimit-requests-limit": "100", + "anthropic-ratelimit-tokens-remaining": "9000", + "other-header": "value", + } + ) + + expected_output = { + "x-ratelimit-limit-requests": "100", + "x-ratelimit-remaining-tokens": "9000", + "llm_provider-anthropic-ratelimit-requests-limit": "100", + "llm_provider-anthropic-ratelimit-tokens-remaining": "9000", + "llm_provider-other-header": "value", + } + + result = process_anthropic_headers(input_headers) + assert result == expected_output, "Unexpected output for partial Anthropic headers" + + +def test_process_anthropic_headers_with_no_matching_headers(): + input_headers = Headers( + {"unrelated-header-1": "value1", "unrelated-header-2": "value2"} + ) + + expected_output = { + "llm_provider-unrelated-header-1": "value1", + "llm_provider-unrelated-header-2": "value2", + } + + result = process_anthropic_headers(input_headers) + assert result == expected_output, "Unexpected output for non-matching headers" diff --git a/tests/llm_translation/test_azure_openai.py b/tests/llm_translation/test_azure_openai.py new file mode 100644 index 000000000..06c6b754a --- /dev/null +++ b/tests/llm_translation/test_azure_openai.py @@ -0,0 +1,96 @@ +import sys +import os + +sys.path.insert( + 0, os.path.abspath("../../") +) # Adds the parent directory to the system path + +import pytest +from litellm.llms.AzureOpenAI.common_utils import process_azure_headers +from httpx import Headers + + +def test_process_azure_headers_empty(): + result = process_azure_headers({}) + assert result == {}, "Expected empty dictionary for no input" + + +def test_process_azure_headers_with_all_headers(): + input_headers = Headers( + { + "x-ratelimit-limit-requests": "100", + "x-ratelimit-remaining-requests": "90", + "x-ratelimit-limit-tokens": "10000", + "x-ratelimit-remaining-tokens": "9000", + "other-header": "value", + } + ) + + expected_output = { + "x-ratelimit-limit-requests": "100", + "x-ratelimit-remaining-requests": "90", + "x-ratelimit-limit-tokens": "10000", + "x-ratelimit-remaining-tokens": "9000", + "llm_provider-x-ratelimit-limit-requests": "100", + "llm_provider-x-ratelimit-remaining-requests": "90", + "llm_provider-x-ratelimit-limit-tokens": "10000", + "llm_provider-x-ratelimit-remaining-tokens": "9000", + "llm_provider-other-header": "value", + } + + result = process_azure_headers(input_headers) + assert result == expected_output, "Unexpected output for all Azure headers" + + +def test_process_azure_headers_with_partial_headers(): + input_headers = Headers( + { + "x-ratelimit-limit-requests": "100", + "x-ratelimit-remaining-tokens": "9000", + "other-header": "value", + } + ) + + expected_output = { + "x-ratelimit-limit-requests": "100", + "x-ratelimit-remaining-tokens": "9000", + "llm_provider-x-ratelimit-limit-requests": "100", + "llm_provider-x-ratelimit-remaining-tokens": "9000", + "llm_provider-other-header": "value", + } + + result = process_azure_headers(input_headers) + assert result == expected_output, "Unexpected output for partial Azure headers" + + +def test_process_azure_headers_with_no_matching_headers(): + input_headers = Headers( + {"unrelated-header-1": "value1", "unrelated-header-2": "value2"} + ) + + expected_output = { + "llm_provider-unrelated-header-1": "value1", + "llm_provider-unrelated-header-2": "value2", + } + + result = process_azure_headers(input_headers) + assert result == expected_output, "Unexpected output for non-matching headers" + + +def test_process_azure_headers_with_dict_input(): + input_headers = { + "x-ratelimit-limit-requests": "100", + "x-ratelimit-remaining-requests": "90", + "other-header": "value", + } + + expected_output = { + "x-ratelimit-limit-requests": "100", + "x-ratelimit-remaining-requests": "90", + "llm_provider-x-ratelimit-limit-requests": "100", + "llm_provider-x-ratelimit-remaining-requests": "90", + "llm_provider-other-header": "value", + } + + result = process_azure_headers(input_headers) + assert result == expected_output, "Unexpected output for dict input" diff --git a/tests/llm_translation/test_llm_response_utils/test_get_headers.py b/tests/llm_translation/test_llm_response_utils/test_get_headers.py new file mode 100644 index 000000000..f0cc7ca61 --- /dev/null +++ b/tests/llm_translation/test_llm_response_utils/test_get_headers.py @@ -0,0 +1,79 @@ +import json +import os +import sys +from datetime import datetime + +sys.path.insert( + 0, os.path.abspath("../../") +) # Adds the parent directory to the system path + +import litellm +import pytest + +from litellm.litellm_core_utils.llm_response_utils.get_headers import ( + get_response_headers, + _get_llm_provider_headers, +) + + +def test_get_response_headers_empty(): + result = get_response_headers() + assert result == {}, "Expected empty dictionary for no input" + + +def test_get_response_headers_with_openai_headers(): + """ + OpenAI headers are forwarded as is + Other headers are prefixed with llm_provider- + """ + input_headers = { + "x-ratelimit-limit-requests": "100", + "x-ratelimit-remaining-requests": "50", + "x-ratelimit-limit-tokens": "1000", + "x-ratelimit-remaining-tokens": "500", + "other-header": "value", + } + expected_output = { + "x-ratelimit-limit-requests": "100", + "x-ratelimit-remaining-requests": "50", + "x-ratelimit-limit-tokens": "1000", + "x-ratelimit-remaining-tokens": "500", + "llm_provider-x-ratelimit-limit-requests": "100", + "llm_provider-x-ratelimit-remaining-requests": "50", + "llm_provider-x-ratelimit-limit-tokens": "1000", + "llm_provider-x-ratelimit-remaining-tokens": "500", + "llm_provider-other-header": "value", + } + result = get_response_headers(input_headers) + assert result == expected_output, "Unexpected output for OpenAI headers" + + +def test_get_response_headers_without_openai_headers(): + """ + Non-OpenAI headers are prefixed with llm_provider- + """ + input_headers = {"custom-header-1": "value1", "custom-header-2": "value2"} + expected_output = { + "llm_provider-custom-header-1": "value1", + "llm_provider-custom-header-2": "value2", + } + result = get_response_headers(input_headers) + assert result == expected_output, "Unexpected output for non-OpenAI headers" + + +def test_get_llm_provider_headers(): + """ + If non OpenAI headers are already prefixed with llm_provider- they are not prefixed with llm_provider- again + """ + input_headers = { + "header1": "value1", + "header2": "value2", + "llm_provider-existing": "existing_value", + } + expected_output = { + "llm_provider-header1": "value1", + "llm_provider-header2": "value2", + "llm_provider-existing": "existing_value", + } + result = _get_llm_provider_headers(input_headers) + assert result == expected_output, "Unexpected output for _get_llm_provider_headers" diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py index 4dc9cc91c..b908c7b5c 100644 --- a/tests/local_testing/test_completion.py +++ b/tests/local_testing/test_completion.py @@ -4569,6 +4569,13 @@ def test_completion_response_ratelimit_headers(model, stream): assert "x-ratelimit-remaining-requests" in additional_headers assert "x-ratelimit-remaining-tokens" in additional_headers + if model == "azure/chatgpt-v-2": + # Azure OpenAI header + assert "llm_provider-azureml-model-session" in additional_headers + if model == "claude-3-sonnet-20240229": + # anthropic header + assert "llm_provider-anthropic-ratelimit-requests-reset" in additional_headers + def _openai_hallucinated_tool_call_mock_response( *args, **kwargs