From e4e76c626114da94477f356a71aa70087c3f08e8 Mon Sep 17 00:00:00 2001 From: Rohit Panda Date: Wed, 23 Apr 2025 17:42:53 +0000 Subject: [PATCH 1/5] fix: resolve Content-Length mismatch in pass-through endpoints --- .../proxy/pass_through_endpoints/pass_through_endpoints.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py index 2fbedaeb22..d438d9ad6a 100644 --- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py @@ -402,13 +402,14 @@ class HttpPassThroughEndpointHelpers: requested_query_params=requested_query_params, ) else: + json_str = json.dumps(_parsed_body) # Pre-serialize JSON to avoid Content-Length mismatch # Generic httpx method response = await async_client.request( method=request.method, url=url, headers=headers, params=requested_query_params, - json=_parsed_body, + data=json_str ) return response @@ -584,10 +585,11 @@ async def pass_through_request( # noqa: PLR0915 }, ) if stream: + json_str = json.dumps(_parsed_body) # Pre-serialize JSON to avoid Content-Length mismatch req = async_client.build_request( "POST", url, - json=_parsed_body, + data=json_str, params=requested_query_params, headers=headers, ) From 1706fa33a369e39e1754fe6105039f95e7cf5939 Mon Sep 17 00:00:00 2001 From: Rohit Panda Date: Wed, 23 Apr 2025 17:56:54 +0000 Subject: [PATCH 2/5] fix: resolve Content-Length mismatch in pass-through endpoints --- .../test_content_length_fix.py | 110 ++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 tests/pass_through_unit_tests/test_content_length_fix.py diff --git a/tests/pass_through_unit_tests/test_content_length_fix.py b/tests/pass_through_unit_tests/test_content_length_fix.py new file mode 100644 index 0000000000..fd0e530665 --- /dev/null +++ b/tests/pass_through_unit_tests/test_content_length_fix.py @@ -0,0 +1,110 @@ +import json +import pytest +import httpx +from unittest.mock import AsyncMock, MagicMock, patch + +class TestPassThroughContentLength: + """Tests for the Content-Length mismatch fix in pass-through endpoints.""" + + def test_content_length_consistency(self): + """Test that the Content-Length is consistent when using pre-serialized JSON.""" + # Test data + test_data = { + "prompt": "\n\nHuman: Tell me a short joke\n\nAssistant:", + "max_tokens_to_sample": 50, + "temperature": 0.7, + "top_p": 0.9 + } + + # Method 1: Using json parameter (what causes the issue) + request1 = httpx.Request( + method="POST", + url="https://example.com", + json=test_data + ) + + # Method 2: Using data parameter with pre-serialized JSON (our fix) + json_str = json.dumps(test_data) + request2 = httpx.Request( + method="POST", + url="https://example.com", + content=json_str.encode(), + headers={"Content-Type": "application/json"} + ) + + # Print the actual differences for verification + print(f"Method 1 (json): Content-Length={request1.headers.get('content-length')}, Actual={len(request1.content)}") + print(f"Method 2 (data): Content-Length={request2.headers.get('content-length')}, Actual={len(request2.content)}") + print(f"Method 1 body: {request1.content}") + print(f"Method 2 body: {request2.content}") + + # Assert that the Content-Length header matches the actual body length for our fix + assert len(request2.content) == int(request2.headers.get("content-length", 0)) + + # Demonstrate the potential mismatch with the json parameter + # Note: This might not always fail depending on how httpx serializes JSON, + # but it demonstrates the potential issue + json_str_manual = json.dumps(test_data) + assert len(json_str_manual.encode()) != len(request1.content), "JSON serialization should be different" + + @pytest.mark.parametrize("use_data", [True, False]) + def test_aws_sigv4_content_length_consistency(self, use_data): + """ + Test that demonstrates how using data with pre-serialized JSON ensures + Content-Length consistency for AWS SigV4 authentication. + """ + # Test data + test_data = { + "prompt": "\n\nHuman: Tell me a short joke\n\nAssistant:", + "max_tokens_to_sample": 50, + "temperature": 0.7, + "top_p": 0.9 + } + + # Simulate SigV4 authentication process + # 1. Pre-serialize JSON for signing + json_str = json.dumps(test_data) + content_length_for_signing = len(json_str.encode()) + + # 2. Create the actual request + if use_data: + # Our fix: Use pre-serialized JSON with data parameter + request = httpx.Request( + method="POST", + url="https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke", + content=json_str.encode(), + headers={ + "Content-Type": "application/json", + "Content-Length": str(content_length_for_signing) + } + ) + else: + # Original approach: Use json parameter (which causes the issue) + request = httpx.Request( + method="POST", + url="https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke", + json=test_data, + headers={ + "Content-Type": "application/json", + "Content-Length": str(content_length_for_signing) + } + ) + + # Check if Content-Length matches actual content length + actual_content_length = len(request.content) + expected_content_length = int(request.headers.get("content-length", 0)) + + print(f"Use data: {use_data}") + print(f"Expected Content-Length: {expected_content_length}") + print(f"Actual content length: {actual_content_length}") + print(f"Content: {request.content}") + + if use_data: + # Our fix should ensure Content-Length matches + assert actual_content_length == expected_content_length, "Content-Length mismatch with data parameter" + else: + # The original approach might cause a mismatch + # Note: This might not always fail depending on how httpx serializes JSON + if actual_content_length != expected_content_length: + print("Content-Length mismatch detected with json parameter!") + print(f"This demonstrates the issue fixed by our PR.") From e13a10bbdd152928f6045ebd8805138ff68b6659 Mon Sep 17 00:00:00 2001 From: Rohit Panda Date: Wed, 23 Apr 2025 18:10:04 +0000 Subject: [PATCH 3/5] Fix linting issues --- .../proxy/pass_through_endpoints/pass_through_endpoints.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py index d438d9ad6a..43a84ecab0 100644 --- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py @@ -409,7 +409,7 @@ class HttpPassThroughEndpointHelpers: url=url, headers=headers, params=requested_query_params, - data=json_str + data=json_str.encode() ) return response @@ -589,7 +589,7 @@ async def pass_through_request( # noqa: PLR0915 req = async_client.build_request( "POST", url, - data=json_str, + data=json_str.encode(), params=requested_query_params, headers=headers, ) From 32ea781632195c084400022b44017e572c263558 Mon Sep 17 00:00:00 2001 From: Rohit Panda Date: Thu, 24 Apr 2025 09:07:36 +0000 Subject: [PATCH 4/5] fix: Move content length tests to test_pass_through_endpoints.py --- .../test_pass_through_endpoints.py | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py b/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py index 43d4dd9cd8..c2823257ba 100644 --- a/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py +++ b/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py @@ -36,6 +36,112 @@ def test_is_multipart(): assert HttpPassThroughEndpointHelpers.is_multipart(request) is False +# Test content length consistency for pass-through endpoints +def test_content_length_consistency(): + """Test that the Content-Length is consistent when using pre-serialized JSON.""" + # Test data + test_data = { + "prompt": "\n\nHuman: Tell me a short joke\n\nAssistant:", + "max_tokens_to_sample": 50, + "temperature": 0.7, + "top_p": 0.9 + } + + # Method 1: Using json parameter (what causes the issue) + request1 = httpx.Request( + method="POST", + url="https://example.com", + json=test_data + ) + + # Method 2: Using data parameter with pre-serialized JSON (our fix) + json_str = json.dumps(test_data) + request2 = httpx.Request( + method="POST", + url="https://example.com", + content=json_str.encode(), + headers={"Content-Type": "application/json"} + ) + + # Print the actual differences for verification + print(f"Method 1 (json): Content-Length={request1.headers.get('content-length')}, Actual={len(request1.content)}") + print(f"Method 2 (data): Content-Length={request2.headers.get('content-length')}, Actual={len(request2.content)}") + print(f"Method 1 body: {request1.content}") + print(f"Method 2 body: {request2.content}") + + # Assert that the Content-Length header matches the actual body length for our fix + assert len(request2.content) == int(request2.headers.get("content-length", 0)) + + # Demonstrate the potential mismatch with the json parameter + # Note: This might not always fail depending on how httpx serializes JSON, + # but it demonstrates the potential issue + json_str_manual = json.dumps(test_data) + assert len(json_str_manual.encode()) != len(request1.content), "JSON serialization should be different" + + +@pytest.mark.parametrize("use_data", [True, False]) +def test_aws_sigv4_content_length_consistency(use_data): + """ + Test that demonstrates how using data with pre-serialized JSON ensures + Content-Length consistency for AWS SigV4 authentication. + """ + # Test data + test_data = { + "prompt": "\n\nHuman: Tell me a short joke\n\nAssistant:", + "max_tokens_to_sample": 50, + "temperature": 0.7, + "top_p": 0.9 + } + + # Simulate SigV4 authentication process + # 1. Pre-serialize JSON for signing + json_str = json.dumps(test_data) + content_length_for_signing = len(json_str.encode()) + + # 2. Create the actual request + if use_data: + # Our fix: Use pre-serialized JSON with data parameter + request = httpx.Request( + method="POST", + url="https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke", + content=json_str.encode(), + headers={ + "Content-Type": "application/json", + "Content-Length": str(content_length_for_signing) + } + ) + else: + # Original approach: Use json parameter (which causes the issue) + request = httpx.Request( + method="POST", + url="https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke", + json=test_data, + headers={ + "Content-Type": "application/json", + "Content-Length": str(content_length_for_signing) + } + ) + + # Check if Content-Length matches actual content length + actual_content_length = len(request.content) + expected_content_length = int(request.headers.get("content-length", 0)) + + print(f"Use data: {use_data}") + print(f"Expected Content-Length: {expected_content_length}") + print(f"Actual content length: {actual_content_length}") + print(f"Content: {request.content}") + + if use_data: + # Our fix should ensure Content-Length matches + assert actual_content_length == expected_content_length, "Content-Length mismatch with data parameter" + else: + # The original approach might cause a mismatch + # Note: This might not always fail depending on how httpx serializes JSON + if actual_content_length != expected_content_length: + print("Content-Length mismatch detected with json parameter!") + print(f"This demonstrates the issue fixed by our PR.") + + # Test _build_request_files_from_upload_file @pytest.mark.asyncio async def test_build_request_files_from_upload_file(): From 939c2a38c70ed2ea4350eb8b1dbdf132b4cffd4a Mon Sep 17 00:00:00 2001 From: Rohit Panda Date: Thu, 24 Apr 2025 09:28:56 +0000 Subject: [PATCH 5/5] fix: Move content length tests to test_pass_through_endpoints.py and fix linting errors --- .../pass_through_endpoints.py | 4 +- .../test_content_length_fix.py | 110 ------------------ 2 files changed, 2 insertions(+), 112 deletions(-) delete mode 100644 tests/pass_through_unit_tests/test_content_length_fix.py diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py index 43a84ecab0..2f31355d5f 100644 --- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py @@ -409,7 +409,7 @@ class HttpPassThroughEndpointHelpers: url=url, headers=headers, params=requested_query_params, - data=json_str.encode() + content=json_str.encode() ) return response @@ -589,7 +589,7 @@ async def pass_through_request( # noqa: PLR0915 req = async_client.build_request( "POST", url, - data=json_str.encode(), + content=json_str.encode(), params=requested_query_params, headers=headers, ) diff --git a/tests/pass_through_unit_tests/test_content_length_fix.py b/tests/pass_through_unit_tests/test_content_length_fix.py deleted file mode 100644 index fd0e530665..0000000000 --- a/tests/pass_through_unit_tests/test_content_length_fix.py +++ /dev/null @@ -1,110 +0,0 @@ -import json -import pytest -import httpx -from unittest.mock import AsyncMock, MagicMock, patch - -class TestPassThroughContentLength: - """Tests for the Content-Length mismatch fix in pass-through endpoints.""" - - def test_content_length_consistency(self): - """Test that the Content-Length is consistent when using pre-serialized JSON.""" - # Test data - test_data = { - "prompt": "\n\nHuman: Tell me a short joke\n\nAssistant:", - "max_tokens_to_sample": 50, - "temperature": 0.7, - "top_p": 0.9 - } - - # Method 1: Using json parameter (what causes the issue) - request1 = httpx.Request( - method="POST", - url="https://example.com", - json=test_data - ) - - # Method 2: Using data parameter with pre-serialized JSON (our fix) - json_str = json.dumps(test_data) - request2 = httpx.Request( - method="POST", - url="https://example.com", - content=json_str.encode(), - headers={"Content-Type": "application/json"} - ) - - # Print the actual differences for verification - print(f"Method 1 (json): Content-Length={request1.headers.get('content-length')}, Actual={len(request1.content)}") - print(f"Method 2 (data): Content-Length={request2.headers.get('content-length')}, Actual={len(request2.content)}") - print(f"Method 1 body: {request1.content}") - print(f"Method 2 body: {request2.content}") - - # Assert that the Content-Length header matches the actual body length for our fix - assert len(request2.content) == int(request2.headers.get("content-length", 0)) - - # Demonstrate the potential mismatch with the json parameter - # Note: This might not always fail depending on how httpx serializes JSON, - # but it demonstrates the potential issue - json_str_manual = json.dumps(test_data) - assert len(json_str_manual.encode()) != len(request1.content), "JSON serialization should be different" - - @pytest.mark.parametrize("use_data", [True, False]) - def test_aws_sigv4_content_length_consistency(self, use_data): - """ - Test that demonstrates how using data with pre-serialized JSON ensures - Content-Length consistency for AWS SigV4 authentication. - """ - # Test data - test_data = { - "prompt": "\n\nHuman: Tell me a short joke\n\nAssistant:", - "max_tokens_to_sample": 50, - "temperature": 0.7, - "top_p": 0.9 - } - - # Simulate SigV4 authentication process - # 1. Pre-serialize JSON for signing - json_str = json.dumps(test_data) - content_length_for_signing = len(json_str.encode()) - - # 2. Create the actual request - if use_data: - # Our fix: Use pre-serialized JSON with data parameter - request = httpx.Request( - method="POST", - url="https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke", - content=json_str.encode(), - headers={ - "Content-Type": "application/json", - "Content-Length": str(content_length_for_signing) - } - ) - else: - # Original approach: Use json parameter (which causes the issue) - request = httpx.Request( - method="POST", - url="https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke", - json=test_data, - headers={ - "Content-Type": "application/json", - "Content-Length": str(content_length_for_signing) - } - ) - - # Check if Content-Length matches actual content length - actual_content_length = len(request.content) - expected_content_length = int(request.headers.get("content-length", 0)) - - print(f"Use data: {use_data}") - print(f"Expected Content-Length: {expected_content_length}") - print(f"Actual content length: {actual_content_length}") - print(f"Content: {request.content}") - - if use_data: - # Our fix should ensure Content-Length matches - assert actual_content_length == expected_content_length, "Content-Length mismatch with data parameter" - else: - # The original approach might cause a mismatch - # Note: This might not always fail depending on how httpx serializes JSON - if actual_content_length != expected_content_length: - print("Content-Length mismatch detected with json parameter!") - print(f"This demonstrates the issue fixed by our PR.")