forked from phoenix/litellm-mirror
(fix) get_response_headers for Azure OpenAI (#6344)
* fix get_response_headers * unit testing for get headers * unit testing for anthropic / azure openai headers * increase test coverage for test_completion_response_ratelimit_headers * fix test rate limit headers
This commit is contained in:
parent
fb523b79e9
commit
274bf3e48d
6 changed files with 316 additions and 30 deletions
55
litellm/litellm_core_utils/llm_response_utils/get_headers.py
Normal file
55
litellm/litellm_core_utils/llm_response_utils/get_headers.py
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
def get_response_headers(_response_headers: Optional[dict] = None) -> dict:
|
||||||
|
"""
|
||||||
|
|
||||||
|
Sets the Appropriate OpenAI headers for the response and forward all headers as llm_provider-{header}
|
||||||
|
|
||||||
|
Note: _response_headers Passed here should be OpenAI compatible headers
|
||||||
|
|
||||||
|
Args:
|
||||||
|
_response_headers (Optional[dict], optional): _response_headers. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: _response_headers with OpenAI headers and llm_provider-{header}
|
||||||
|
|
||||||
|
"""
|
||||||
|
if _response_headers is not None:
|
||||||
|
openai_headers = {}
|
||||||
|
if "x-ratelimit-limit-requests" in _response_headers:
|
||||||
|
openai_headers["x-ratelimit-limit-requests"] = _response_headers[
|
||||||
|
"x-ratelimit-limit-requests"
|
||||||
|
]
|
||||||
|
if "x-ratelimit-remaining-requests" in _response_headers:
|
||||||
|
openai_headers["x-ratelimit-remaining-requests"] = _response_headers[
|
||||||
|
"x-ratelimit-remaining-requests"
|
||||||
|
]
|
||||||
|
if "x-ratelimit-limit-tokens" in _response_headers:
|
||||||
|
openai_headers["x-ratelimit-limit-tokens"] = _response_headers[
|
||||||
|
"x-ratelimit-limit-tokens"
|
||||||
|
]
|
||||||
|
if "x-ratelimit-remaining-tokens" in _response_headers:
|
||||||
|
openai_headers["x-ratelimit-remaining-tokens"] = _response_headers[
|
||||||
|
"x-ratelimit-remaining-tokens"
|
||||||
|
]
|
||||||
|
llm_provider_headers = _get_llm_provider_headers(_response_headers)
|
||||||
|
return {**llm_provider_headers, **openai_headers}
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_llm_provider_headers(response_headers: dict) -> dict:
|
||||||
|
"""
|
||||||
|
Adds a llm_provider-{header} to all headers that are not already prefixed with llm_provider
|
||||||
|
|
||||||
|
Forward all headers as llm_provider-{header}
|
||||||
|
|
||||||
|
"""
|
||||||
|
llm_provider_headers = {}
|
||||||
|
for k, v in response_headers.items():
|
||||||
|
if "llm_provider" not in k:
|
||||||
|
_key = "{}-{}".format("llm_provider", k)
|
||||||
|
llm_provider_headers[_key] = v
|
||||||
|
else:
|
||||||
|
llm_provider_headers[k] = v
|
||||||
|
return llm_provider_headers
|
|
@ -70,6 +70,9 @@ from litellm.litellm_core_utils.get_llm_provider_logic import (
|
||||||
get_llm_provider,
|
get_llm_provider,
|
||||||
)
|
)
|
||||||
from litellm.litellm_core_utils.llm_request_utils import _ensure_extra_body_is_safe
|
from litellm.litellm_core_utils.llm_request_utils import _ensure_extra_body_is_safe
|
||||||
|
from litellm.litellm_core_utils.llm_response_utils.get_headers import (
|
||||||
|
get_response_headers,
|
||||||
|
)
|
||||||
from litellm.litellm_core_utils.redact_messages import (
|
from litellm.litellm_core_utils.redact_messages import (
|
||||||
LiteLLMLoggingObject,
|
LiteLLMLoggingObject,
|
||||||
redact_message_input_output_from_logging,
|
redact_message_input_output_from_logging,
|
||||||
|
@ -5704,36 +5707,12 @@ def convert_to_model_response_object( # noqa: PLR0915
|
||||||
):
|
):
|
||||||
received_args = locals()
|
received_args = locals()
|
||||||
|
|
||||||
if _response_headers is not None:
|
additional_headers = get_response_headers(_response_headers)
|
||||||
openai_headers = {}
|
|
||||||
if "x-ratelimit-limit-requests" in _response_headers:
|
if hidden_params is None:
|
||||||
openai_headers["x-ratelimit-limit-requests"] = _response_headers[
|
hidden_params = {}
|
||||||
"x-ratelimit-limit-requests"
|
hidden_params["additional_headers"] = additional_headers
|
||||||
]
|
|
||||||
if "x-ratelimit-remaining-requests" in _response_headers:
|
|
||||||
openai_headers["x-ratelimit-remaining-requests"] = _response_headers[
|
|
||||||
"x-ratelimit-remaining-requests"
|
|
||||||
]
|
|
||||||
if "x-ratelimit-limit-tokens" in _response_headers:
|
|
||||||
openai_headers["x-ratelimit-limit-tokens"] = _response_headers[
|
|
||||||
"x-ratelimit-limit-tokens"
|
|
||||||
]
|
|
||||||
if "x-ratelimit-remaining-tokens" in _response_headers:
|
|
||||||
openai_headers["x-ratelimit-remaining-tokens"] = _response_headers[
|
|
||||||
"x-ratelimit-remaining-tokens"
|
|
||||||
]
|
|
||||||
llm_response_headers = {
|
|
||||||
"{}-{}".format("llm_provider", k): v for k, v in _response_headers.items()
|
|
||||||
}
|
|
||||||
if hidden_params is not None:
|
|
||||||
hidden_params["additional_headers"] = {
|
|
||||||
**llm_response_headers,
|
|
||||||
**openai_headers,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
hidden_params = {
|
|
||||||
"additional_headers": {**llm_response_headers, **openai_headers}
|
|
||||||
}
|
|
||||||
### CHECK IF ERROR IN RESPONSE ### - openrouter returns these in the dictionary
|
### CHECK IF ERROR IN RESPONSE ### - openrouter returns these in the dictionary
|
||||||
if (
|
if (
|
||||||
response_object is not None
|
response_object is not None
|
||||||
|
|
|
@ -34,6 +34,9 @@ from litellm import (
|
||||||
from litellm.adapters.anthropic_adapter import anthropic_adapter
|
from litellm.adapters.anthropic_adapter import anthropic_adapter
|
||||||
from litellm.types.llms.anthropic import AnthropicResponse
|
from litellm.types.llms.anthropic import AnthropicResponse
|
||||||
|
|
||||||
|
from litellm.llms.anthropic.common_utils import process_anthropic_headers
|
||||||
|
from httpx import Headers
|
||||||
|
|
||||||
|
|
||||||
def test_anthropic_completion_messages_translation():
|
def test_anthropic_completion_messages_translation():
|
||||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
|
@ -457,3 +460,70 @@ def test_anthropic_tool_calling_translation():
|
||||||
|
|
||||||
assert len(translated_params["messages"]) > 0
|
assert len(translated_params["messages"]) > 0
|
||||||
assert translated_params["messages"][0]["role"] == "user"
|
assert translated_params["messages"][0]["role"] == "user"
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_anthropic_headers_empty():
|
||||||
|
result = process_anthropic_headers({})
|
||||||
|
assert result == {}, "Expected empty dictionary for no input"
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_anthropic_headers_with_all_headers():
|
||||||
|
input_headers = Headers(
|
||||||
|
{
|
||||||
|
"anthropic-ratelimit-requests-limit": "100",
|
||||||
|
"anthropic-ratelimit-requests-remaining": "90",
|
||||||
|
"anthropic-ratelimit-tokens-limit": "10000",
|
||||||
|
"anthropic-ratelimit-tokens-remaining": "9000",
|
||||||
|
"other-header": "value",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_output = {
|
||||||
|
"x-ratelimit-limit-requests": "100",
|
||||||
|
"x-ratelimit-remaining-requests": "90",
|
||||||
|
"x-ratelimit-limit-tokens": "10000",
|
||||||
|
"x-ratelimit-remaining-tokens": "9000",
|
||||||
|
"llm_provider-anthropic-ratelimit-requests-limit": "100",
|
||||||
|
"llm_provider-anthropic-ratelimit-requests-remaining": "90",
|
||||||
|
"llm_provider-anthropic-ratelimit-tokens-limit": "10000",
|
||||||
|
"llm_provider-anthropic-ratelimit-tokens-remaining": "9000",
|
||||||
|
"llm_provider-other-header": "value",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = process_anthropic_headers(input_headers)
|
||||||
|
assert result == expected_output, "Unexpected output for all Anthropic headers"
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_anthropic_headers_with_partial_headers():
|
||||||
|
input_headers = Headers(
|
||||||
|
{
|
||||||
|
"anthropic-ratelimit-requests-limit": "100",
|
||||||
|
"anthropic-ratelimit-tokens-remaining": "9000",
|
||||||
|
"other-header": "value",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_output = {
|
||||||
|
"x-ratelimit-limit-requests": "100",
|
||||||
|
"x-ratelimit-remaining-tokens": "9000",
|
||||||
|
"llm_provider-anthropic-ratelimit-requests-limit": "100",
|
||||||
|
"llm_provider-anthropic-ratelimit-tokens-remaining": "9000",
|
||||||
|
"llm_provider-other-header": "value",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = process_anthropic_headers(input_headers)
|
||||||
|
assert result == expected_output, "Unexpected output for partial Anthropic headers"
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_anthropic_headers_with_no_matching_headers():
|
||||||
|
input_headers = Headers(
|
||||||
|
{"unrelated-header-1": "value1", "unrelated-header-2": "value2"}
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_output = {
|
||||||
|
"llm_provider-unrelated-header-1": "value1",
|
||||||
|
"llm_provider-unrelated-header-2": "value2",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = process_anthropic_headers(input_headers)
|
||||||
|
assert result == expected_output, "Unexpected output for non-matching headers"
|
||||||
|
|
96
tests/llm_translation/test_azure_openai.py
Normal file
96
tests/llm_translation/test_azure_openai.py
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../../")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from litellm.llms.AzureOpenAI.common_utils import process_azure_headers
|
||||||
|
from httpx import Headers
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_azure_headers_empty():
|
||||||
|
result = process_azure_headers({})
|
||||||
|
assert result == {}, "Expected empty dictionary for no input"
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_azure_headers_with_all_headers():
|
||||||
|
input_headers = Headers(
|
||||||
|
{
|
||||||
|
"x-ratelimit-limit-requests": "100",
|
||||||
|
"x-ratelimit-remaining-requests": "90",
|
||||||
|
"x-ratelimit-limit-tokens": "10000",
|
||||||
|
"x-ratelimit-remaining-tokens": "9000",
|
||||||
|
"other-header": "value",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_output = {
|
||||||
|
"x-ratelimit-limit-requests": "100",
|
||||||
|
"x-ratelimit-remaining-requests": "90",
|
||||||
|
"x-ratelimit-limit-tokens": "10000",
|
||||||
|
"x-ratelimit-remaining-tokens": "9000",
|
||||||
|
"llm_provider-x-ratelimit-limit-requests": "100",
|
||||||
|
"llm_provider-x-ratelimit-remaining-requests": "90",
|
||||||
|
"llm_provider-x-ratelimit-limit-tokens": "10000",
|
||||||
|
"llm_provider-x-ratelimit-remaining-tokens": "9000",
|
||||||
|
"llm_provider-other-header": "value",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = process_azure_headers(input_headers)
|
||||||
|
assert result == expected_output, "Unexpected output for all Azure headers"
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_azure_headers_with_partial_headers():
|
||||||
|
input_headers = Headers(
|
||||||
|
{
|
||||||
|
"x-ratelimit-limit-requests": "100",
|
||||||
|
"x-ratelimit-remaining-tokens": "9000",
|
||||||
|
"other-header": "value",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_output = {
|
||||||
|
"x-ratelimit-limit-requests": "100",
|
||||||
|
"x-ratelimit-remaining-tokens": "9000",
|
||||||
|
"llm_provider-x-ratelimit-limit-requests": "100",
|
||||||
|
"llm_provider-x-ratelimit-remaining-tokens": "9000",
|
||||||
|
"llm_provider-other-header": "value",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = process_azure_headers(input_headers)
|
||||||
|
assert result == expected_output, "Unexpected output for partial Azure headers"
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_azure_headers_with_no_matching_headers():
|
||||||
|
input_headers = Headers(
|
||||||
|
{"unrelated-header-1": "value1", "unrelated-header-2": "value2"}
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_output = {
|
||||||
|
"llm_provider-unrelated-header-1": "value1",
|
||||||
|
"llm_provider-unrelated-header-2": "value2",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = process_azure_headers(input_headers)
|
||||||
|
assert result == expected_output, "Unexpected output for non-matching headers"
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_azure_headers_with_dict_input():
|
||||||
|
input_headers = {
|
||||||
|
"x-ratelimit-limit-requests": "100",
|
||||||
|
"x-ratelimit-remaining-requests": "90",
|
||||||
|
"other-header": "value",
|
||||||
|
}
|
||||||
|
|
||||||
|
expected_output = {
|
||||||
|
"x-ratelimit-limit-requests": "100",
|
||||||
|
"x-ratelimit-remaining-requests": "90",
|
||||||
|
"llm_provider-x-ratelimit-limit-requests": "100",
|
||||||
|
"llm_provider-x-ratelimit-remaining-requests": "90",
|
||||||
|
"llm_provider-other-header": "value",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = process_azure_headers(input_headers)
|
||||||
|
assert result == expected_output, "Unexpected output for dict input"
|
|
@ -0,0 +1,79 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../../")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from litellm.litellm_core_utils.llm_response_utils.get_headers import (
|
||||||
|
get_response_headers,
|
||||||
|
_get_llm_provider_headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_response_headers_empty():
|
||||||
|
result = get_response_headers()
|
||||||
|
assert result == {}, "Expected empty dictionary for no input"
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_response_headers_with_openai_headers():
|
||||||
|
"""
|
||||||
|
OpenAI headers are forwarded as is
|
||||||
|
Other headers are prefixed with llm_provider-
|
||||||
|
"""
|
||||||
|
input_headers = {
|
||||||
|
"x-ratelimit-limit-requests": "100",
|
||||||
|
"x-ratelimit-remaining-requests": "50",
|
||||||
|
"x-ratelimit-limit-tokens": "1000",
|
||||||
|
"x-ratelimit-remaining-tokens": "500",
|
||||||
|
"other-header": "value",
|
||||||
|
}
|
||||||
|
expected_output = {
|
||||||
|
"x-ratelimit-limit-requests": "100",
|
||||||
|
"x-ratelimit-remaining-requests": "50",
|
||||||
|
"x-ratelimit-limit-tokens": "1000",
|
||||||
|
"x-ratelimit-remaining-tokens": "500",
|
||||||
|
"llm_provider-x-ratelimit-limit-requests": "100",
|
||||||
|
"llm_provider-x-ratelimit-remaining-requests": "50",
|
||||||
|
"llm_provider-x-ratelimit-limit-tokens": "1000",
|
||||||
|
"llm_provider-x-ratelimit-remaining-tokens": "500",
|
||||||
|
"llm_provider-other-header": "value",
|
||||||
|
}
|
||||||
|
result = get_response_headers(input_headers)
|
||||||
|
assert result == expected_output, "Unexpected output for OpenAI headers"
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_response_headers_without_openai_headers():
|
||||||
|
"""
|
||||||
|
Non-OpenAI headers are prefixed with llm_provider-
|
||||||
|
"""
|
||||||
|
input_headers = {"custom-header-1": "value1", "custom-header-2": "value2"}
|
||||||
|
expected_output = {
|
||||||
|
"llm_provider-custom-header-1": "value1",
|
||||||
|
"llm_provider-custom-header-2": "value2",
|
||||||
|
}
|
||||||
|
result = get_response_headers(input_headers)
|
||||||
|
assert result == expected_output, "Unexpected output for non-OpenAI headers"
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_llm_provider_headers():
|
||||||
|
"""
|
||||||
|
If non OpenAI headers are already prefixed with llm_provider- they are not prefixed with llm_provider- again
|
||||||
|
"""
|
||||||
|
input_headers = {
|
||||||
|
"header1": "value1",
|
||||||
|
"header2": "value2",
|
||||||
|
"llm_provider-existing": "existing_value",
|
||||||
|
}
|
||||||
|
expected_output = {
|
||||||
|
"llm_provider-header1": "value1",
|
||||||
|
"llm_provider-header2": "value2",
|
||||||
|
"llm_provider-existing": "existing_value",
|
||||||
|
}
|
||||||
|
result = _get_llm_provider_headers(input_headers)
|
||||||
|
assert result == expected_output, "Unexpected output for _get_llm_provider_headers"
|
|
@ -4569,6 +4569,13 @@ def test_completion_response_ratelimit_headers(model, stream):
|
||||||
assert "x-ratelimit-remaining-requests" in additional_headers
|
assert "x-ratelimit-remaining-requests" in additional_headers
|
||||||
assert "x-ratelimit-remaining-tokens" in additional_headers
|
assert "x-ratelimit-remaining-tokens" in additional_headers
|
||||||
|
|
||||||
|
if model == "azure/chatgpt-v-2":
|
||||||
|
# Azure OpenAI header
|
||||||
|
assert "llm_provider-azureml-model-session" in additional_headers
|
||||||
|
if model == "claude-3-sonnet-20240229":
|
||||||
|
# anthropic header
|
||||||
|
assert "llm_provider-anthropic-ratelimit-requests-reset" in additional_headers
|
||||||
|
|
||||||
|
|
||||||
def _openai_hallucinated_tool_call_mock_response(
|
def _openai_hallucinated_tool_call_mock_response(
|
||||||
*args, **kwargs
|
*args, **kwargs
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue