From e4e76c626114da94477f356a71aa70087c3f08e8 Mon Sep 17 00:00:00 2001
From: Rohit Panda <pandaro@amazon.com>
Date: Wed, 23 Apr 2025 17:42:53 +0000
Subject: [PATCH 1/5] fix: resolve Content-Length mismatch in pass-through
 endpoints

---
 .../proxy/pass_through_endpoints/pass_through_endpoints.py  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
index 2fbedaeb22..d438d9ad6a 100644
--- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
@@ -402,13 +402,14 @@ class HttpPassThroughEndpointHelpers:
                 requested_query_params=requested_query_params,
             )
         else:
+            json_str = json.dumps(_parsed_body)  # Pre-serialize JSON to avoid Content-Length mismatch
             # Generic httpx method
             response = await async_client.request(
                 method=request.method,
                 url=url,
                 headers=headers,
                 params=requested_query_params,
-                json=_parsed_body,
+                data=json_str
             )
         return response
 
@@ -584,10 +585,11 @@ async def pass_through_request(  # noqa: PLR0915
             },
         )
         if stream:
+            json_str = json.dumps(_parsed_body)  # Pre-serialize JSON to avoid Content-Length mismatch
             req = async_client.build_request(
                 "POST",
                 url,
-                json=_parsed_body,
+                data=json_str,
                 params=requested_query_params,
                 headers=headers,
             )

From 1706fa33a369e39e1754fe6105039f95e7cf5939 Mon Sep 17 00:00:00 2001
From: Rohit Panda <pandaro@amazon.com>
Date: Wed, 23 Apr 2025 17:56:54 +0000
Subject: [PATCH 2/5] fix: resolve Content-Length mismatch in pass-through
 endpoints

---
 .../test_content_length_fix.py                | 110 ++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 tests/pass_through_unit_tests/test_content_length_fix.py

diff --git a/tests/pass_through_unit_tests/test_content_length_fix.py b/tests/pass_through_unit_tests/test_content_length_fix.py
new file mode 100644
index 0000000000..fd0e530665
--- /dev/null
+++ b/tests/pass_through_unit_tests/test_content_length_fix.py
@@ -0,0 +1,110 @@
+import json
+import pytest
+import httpx
+from unittest.mock import AsyncMock, MagicMock, patch
+
+class TestPassThroughContentLength:
+    """Tests for the Content-Length mismatch fix in pass-through endpoints."""
+
+    def test_content_length_consistency(self):
+        """Test that the Content-Length is consistent when using pre-serialized JSON."""
+        # Test data
+        test_data = {
+            "prompt": "\n\nHuman: Tell me a short joke\n\nAssistant:",
+            "max_tokens_to_sample": 50,
+            "temperature": 0.7,
+            "top_p": 0.9
+        }
+        
+        # Method 1: Using json parameter (what causes the issue)
+        request1 = httpx.Request(
+            method="POST",
+            url="https://example.com",
+            json=test_data
+        )
+        
+        # Method 2: Using data parameter with pre-serialized JSON (our fix)
+        json_str = json.dumps(test_data)
+        request2 = httpx.Request(
+            method="POST",
+            url="https://example.com",
+            content=json_str.encode(),
+            headers={"Content-Type": "application/json"}
+        )
+        
+        # Print the actual differences for verification
+        print(f"Method 1 (json): Content-Length={request1.headers.get('content-length')}, Actual={len(request1.content)}")
+        print(f"Method 2 (data): Content-Length={request2.headers.get('content-length')}, Actual={len(request2.content)}")
+        print(f"Method 1 body: {request1.content}")
+        print(f"Method 2 body: {request2.content}")
+        
+        # Assert that the Content-Length header matches the actual body length for our fix
+        assert len(request2.content) == int(request2.headers.get("content-length", 0))
+        
+        # Demonstrate the potential mismatch with the json parameter
+        # Note: This might not always fail depending on how httpx serializes JSON,
+        # but it demonstrates the potential issue
+        json_str_manual = json.dumps(test_data)
+        assert len(json_str_manual.encode()) != len(request1.content), "JSON serialization should be different"
+        
+    @pytest.mark.parametrize("use_data", [True, False])
+    def test_aws_sigv4_content_length_consistency(self, use_data):
+        """
+        Test that demonstrates how using data with pre-serialized JSON ensures
+        Content-Length consistency for AWS SigV4 authentication.
+        """
+        # Test data
+        test_data = {
+            "prompt": "\n\nHuman: Tell me a short joke\n\nAssistant:",
+            "max_tokens_to_sample": 50,
+            "temperature": 0.7,
+            "top_p": 0.9
+        }
+        
+        # Simulate SigV4 authentication process
+        # 1. Pre-serialize JSON for signing
+        json_str = json.dumps(test_data)
+        content_length_for_signing = len(json_str.encode())
+        
+        # 2. Create the actual request
+        if use_data:
+            # Our fix: Use pre-serialized JSON with data parameter
+            request = httpx.Request(
+                method="POST",
+                url="https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke",
+                content=json_str.encode(),
+                headers={
+                    "Content-Type": "application/json",
+                    "Content-Length": str(content_length_for_signing)
+                }
+            )
+        else:
+            # Original approach: Use json parameter (which causes the issue)
+            request = httpx.Request(
+                method="POST",
+                url="https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke",
+                json=test_data,
+                headers={
+                    "Content-Type": "application/json",
+                    "Content-Length": str(content_length_for_signing)
+                }
+            )
+        
+        # Check if Content-Length matches actual content length
+        actual_content_length = len(request.content)
+        expected_content_length = int(request.headers.get("content-length", 0))
+        
+        print(f"Use data: {use_data}")
+        print(f"Expected Content-Length: {expected_content_length}")
+        print(f"Actual content length: {actual_content_length}")
+        print(f"Content: {request.content}")
+        
+        if use_data:
+            # Our fix should ensure Content-Length matches
+            assert actual_content_length == expected_content_length, "Content-Length mismatch with data parameter"
+        else:
+            # The original approach might cause a mismatch
+            # Note: This might not always fail depending on how httpx serializes JSON
+            if actual_content_length != expected_content_length:
+                print("Content-Length mismatch detected with json parameter!")
+                print(f"This demonstrates the issue fixed by our PR.")

From e13a10bbdd152928f6045ebd8805138ff68b6659 Mon Sep 17 00:00:00 2001
From: Rohit Panda <pandaro@amazon.com>
Date: Wed, 23 Apr 2025 18:10:04 +0000
Subject: [PATCH 3/5] Fix linting issues

---
 .../proxy/pass_through_endpoints/pass_through_endpoints.py    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
index d438d9ad6a..43a84ecab0 100644
--- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
@@ -409,7 +409,7 @@ class HttpPassThroughEndpointHelpers:
                 url=url,
                 headers=headers,
                 params=requested_query_params,
-                data=json_str
+                data=json_str.encode()
             )
         return response
 
@@ -589,7 +589,7 @@ async def pass_through_request(  # noqa: PLR0915
             req = async_client.build_request(
                 "POST",
                 url,
-                data=json_str,
+                data=json_str.encode(),
                 params=requested_query_params,
                 headers=headers,
             )

From 32ea781632195c084400022b44017e572c263558 Mon Sep 17 00:00:00 2001
From: Rohit Panda <pandaro@amazon.com>
Date: Thu, 24 Apr 2025 09:07:36 +0000
Subject: [PATCH 4/5] fix: Move content length tests to
 test_pass_through_endpoints.py

---
 .../test_pass_through_endpoints.py            | 106 ++++++++++++++++++
 1 file changed, 106 insertions(+)

diff --git a/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py b/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py
index 43d4dd9cd8..c2823257ba 100644
--- a/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py
+++ b/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py
@@ -36,6 +36,112 @@ def test_is_multipart():
     assert HttpPassThroughEndpointHelpers.is_multipart(request) is False
 
 
+# Test content length consistency for pass-through endpoints
+def test_content_length_consistency():
+    """Test that the Content-Length is consistent when using pre-serialized JSON."""
+    # Test data
+    test_data = {
+        "prompt": "\n\nHuman: Tell me a short joke\n\nAssistant:",
+        "max_tokens_to_sample": 50,
+        "temperature": 0.7,
+        "top_p": 0.9
+    }
+    
+    # Method 1: Using json parameter (what causes the issue)
+    request1 = httpx.Request(
+        method="POST",
+        url="https://example.com",
+        json=test_data
+    )
+    
+    # Method 2: Using data parameter with pre-serialized JSON (our fix)
+    json_str = json.dumps(test_data)
+    request2 = httpx.Request(
+        method="POST",
+        url="https://example.com",
+        content=json_str.encode(),
+        headers={"Content-Type": "application/json"}
+    )
+    
+    # Print the actual differences for verification
+    print(f"Method 1 (json): Content-Length={request1.headers.get('content-length')}, Actual={len(request1.content)}")
+    print(f"Method 2 (data): Content-Length={request2.headers.get('content-length')}, Actual={len(request2.content)}")
+    print(f"Method 1 body: {request1.content}")
+    print(f"Method 2 body: {request2.content}")
+    
+    # Assert that the Content-Length header matches the actual body length for our fix
+    assert len(request2.content) == int(request2.headers.get("content-length", 0))
+    
+    # Demonstrate the potential mismatch with the json parameter
+    # Note: This might not always fail depending on how httpx serializes JSON,
+    # but it demonstrates the potential issue
+    json_str_manual = json.dumps(test_data)
+    assert len(json_str_manual.encode()) != len(request1.content), "JSON serialization should be different"
+
+
+@pytest.mark.parametrize("use_data", [True, False])
+def test_aws_sigv4_content_length_consistency(use_data):
+    """
+    Test that demonstrates how using data with pre-serialized JSON ensures
+    Content-Length consistency for AWS SigV4 authentication.
+    """
+    # Test data
+    test_data = {
+        "prompt": "\n\nHuman: Tell me a short joke\n\nAssistant:",
+        "max_tokens_to_sample": 50,
+        "temperature": 0.7,
+        "top_p": 0.9
+    }
+    
+    # Simulate SigV4 authentication process
+    # 1. Pre-serialize JSON for signing
+    json_str = json.dumps(test_data)
+    content_length_for_signing = len(json_str.encode())
+    
+    # 2. Create the actual request
+    if use_data:
+        # Our fix: Use pre-serialized JSON with data parameter
+        request = httpx.Request(
+            method="POST",
+            url="https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke",
+            content=json_str.encode(),
+            headers={
+                "Content-Type": "application/json",
+                "Content-Length": str(content_length_for_signing)
+            }
+        )
+    else:
+        # Original approach: Use json parameter (which causes the issue)
+        request = httpx.Request(
+            method="POST",
+            url="https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke",
+            json=test_data,
+            headers={
+                "Content-Type": "application/json",
+                "Content-Length": str(content_length_for_signing)
+            }
+        )
+    
+    # Check if Content-Length matches actual content length
+    actual_content_length = len(request.content)
+    expected_content_length = int(request.headers.get("content-length", 0))
+    
+    print(f"Use data: {use_data}")
+    print(f"Expected Content-Length: {expected_content_length}")
+    print(f"Actual content length: {actual_content_length}")
+    print(f"Content: {request.content}")
+    
+    if use_data:
+        # Our fix should ensure Content-Length matches
+        assert actual_content_length == expected_content_length, "Content-Length mismatch with data parameter"
+    else:
+        # The original approach might cause a mismatch
+        # Note: This might not always fail depending on how httpx serializes JSON
+        if actual_content_length != expected_content_length:
+            print("Content-Length mismatch detected with json parameter!")
+            print(f"This demonstrates the issue fixed by our PR.")
+
+
 # Test _build_request_files_from_upload_file
 @pytest.mark.asyncio
 async def test_build_request_files_from_upload_file():

From 939c2a38c70ed2ea4350eb8b1dbdf132b4cffd4a Mon Sep 17 00:00:00 2001
From: Rohit Panda <pandaro@amazon.com>
Date: Thu, 24 Apr 2025 09:28:56 +0000
Subject: [PATCH 5/5] fix: Move content length tests to
 test_pass_through_endpoints.py and fix linting errors

---
 .../pass_through_endpoints.py                 |   4 +-
 .../test_content_length_fix.py                | 110 ------------------
 2 files changed, 2 insertions(+), 112 deletions(-)
 delete mode 100644 tests/pass_through_unit_tests/test_content_length_fix.py

diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
index 43a84ecab0..2f31355d5f 100644
--- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
@@ -409,7 +409,7 @@ class HttpPassThroughEndpointHelpers:
                 url=url,
                 headers=headers,
                 params=requested_query_params,
-                data=json_str.encode()
+                content=json_str.encode()
             )
         return response
 
@@ -589,7 +589,7 @@ async def pass_through_request(  # noqa: PLR0915
             req = async_client.build_request(
                 "POST",
                 url,
-                data=json_str.encode(),
+                content=json_str.encode(),
                 params=requested_query_params,
                 headers=headers,
             )
diff --git a/tests/pass_through_unit_tests/test_content_length_fix.py b/tests/pass_through_unit_tests/test_content_length_fix.py
deleted file mode 100644
index fd0e530665..0000000000
--- a/tests/pass_through_unit_tests/test_content_length_fix.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import json
-import pytest
-import httpx
-from unittest.mock import AsyncMock, MagicMock, patch
-
-class TestPassThroughContentLength:
-    """Tests for the Content-Length mismatch fix in pass-through endpoints."""
-
-    def test_content_length_consistency(self):
-        """Test that the Content-Length is consistent when using pre-serialized JSON."""
-        # Test data
-        test_data = {
-            "prompt": "\n\nHuman: Tell me a short joke\n\nAssistant:",
-            "max_tokens_to_sample": 50,
-            "temperature": 0.7,
-            "top_p": 0.9
-        }
-        
-        # Method 1: Using json parameter (what causes the issue)
-        request1 = httpx.Request(
-            method="POST",
-            url="https://example.com",
-            json=test_data
-        )
-        
-        # Method 2: Using data parameter with pre-serialized JSON (our fix)
-        json_str = json.dumps(test_data)
-        request2 = httpx.Request(
-            method="POST",
-            url="https://example.com",
-            content=json_str.encode(),
-            headers={"Content-Type": "application/json"}
-        )
-        
-        # Print the actual differences for verification
-        print(f"Method 1 (json): Content-Length={request1.headers.get('content-length')}, Actual={len(request1.content)}")
-        print(f"Method 2 (data): Content-Length={request2.headers.get('content-length')}, Actual={len(request2.content)}")
-        print(f"Method 1 body: {request1.content}")
-        print(f"Method 2 body: {request2.content}")
-        
-        # Assert that the Content-Length header matches the actual body length for our fix
-        assert len(request2.content) == int(request2.headers.get("content-length", 0))
-        
-        # Demonstrate the potential mismatch with the json parameter
-        # Note: This might not always fail depending on how httpx serializes JSON,
-        # but it demonstrates the potential issue
-        json_str_manual = json.dumps(test_data)
-        assert len(json_str_manual.encode()) != len(request1.content), "JSON serialization should be different"
-        
-    @pytest.mark.parametrize("use_data", [True, False])
-    def test_aws_sigv4_content_length_consistency(self, use_data):
-        """
-        Test that demonstrates how using data with pre-serialized JSON ensures
-        Content-Length consistency for AWS SigV4 authentication.
-        """
-        # Test data
-        test_data = {
-            "prompt": "\n\nHuman: Tell me a short joke\n\nAssistant:",
-            "max_tokens_to_sample": 50,
-            "temperature": 0.7,
-            "top_p": 0.9
-        }
-        
-        # Simulate SigV4 authentication process
-        # 1. Pre-serialize JSON for signing
-        json_str = json.dumps(test_data)
-        content_length_for_signing = len(json_str.encode())
-        
-        # 2. Create the actual request
-        if use_data:
-            # Our fix: Use pre-serialized JSON with data parameter
-            request = httpx.Request(
-                method="POST",
-                url="https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke",
-                content=json_str.encode(),
-                headers={
-                    "Content-Type": "application/json",
-                    "Content-Length": str(content_length_for_signing)
-                }
-            )
-        else:
-            # Original approach: Use json parameter (which causes the issue)
-            request = httpx.Request(
-                method="POST",
-                url="https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke",
-                json=test_data,
-                headers={
-                    "Content-Type": "application/json",
-                    "Content-Length": str(content_length_for_signing)
-                }
-            )
-        
-        # Check if Content-Length matches actual content length
-        actual_content_length = len(request.content)
-        expected_content_length = int(request.headers.get("content-length", 0))
-        
-        print(f"Use data: {use_data}")
-        print(f"Expected Content-Length: {expected_content_length}")
-        print(f"Actual content length: {actual_content_length}")
-        print(f"Content: {request.content}")
-        
-        if use_data:
-            # Our fix should ensure Content-Length matches
-            assert actual_content_length == expected_content_length, "Content-Length mismatch with data parameter"
-        else:
-            # The original approach might cause a mismatch
-            # Note: This might not always fail depending on how httpx serializes JSON
-            if actual_content_length != expected_content_length:
-                print("Content-Length mismatch detected with json parameter!")
-                print(f"This demonstrates the issue fixed by our PR.")