(fix) pass through endpoints - run logging async + use thread pool executor for sync logging callbacks (#6907)

* run pass through logging async * fix use thread_pool_executor for pass through logging * test_pass_through_request_logging_failure_with_stream * fix anthropic pt logging test * test_pass_through_request_logging_failure
2024-11-25 22:52:05 -08:00 · 2024-11-25 22:52:05 -08:00 · 552c0dd7a4
commit 552c0dd7a4
parent d52aae4e82
6 changed files with 201 additions and 33 deletions
--- a/tests/pass_through_tests/test_anthropic_passthrough.py
+++ b/tests/pass_through_tests/test_anthropic_passthrough.py
@ -141,7 +141,9 @@ async def test_anthropic_basic_completion_with_headers():
                ), "Start time should be before end time"

                # Metadata assertions
-                assert log_entry["cache_hit"] == "False", "Cache should be off"
+                assert (
+                    str(log_entry["cache_hit"]).lower() != "true"
+                ), "Cache should be off"
                assert log_entry["request_tags"] == [
                    "test-tag-1",
                    "test-tag-2",
@ -251,7 +253,9 @@ async def test_anthropic_streaming_with_headers():
                ), "Start time should be before end time"

                # Metadata assertions
-                assert log_entry["cache_hit"] == "False", "Cache should be off"
+                assert (
+                    str(log_entry["cache_hit"]).lower() != "true"
+                ), "Cache should be off"
                assert log_entry["request_tags"] == [
                    "test-tag-stream-1",
                    "test-tag-stream-2",
--- a/tests/pass_through_unit_tests/test_pass_through_unit_tests.py
+++ b/tests/pass_through_unit_tests/test_pass_through_unit_tests.py
@ -3,11 +3,13 @@ import os
 import sys
 from datetime import datetime
 from unittest.mock import AsyncMock, Mock, patch, MagicMock
+from typing import Optional

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path

+import fastapi
 import httpx
 import pytest
 import litellm
@ -21,6 +23,9 @@ from litellm.proxy.pass_through_endpoints.streaming_handler import (
    PassThroughStreamingHandler,
 )

+from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
+    pass_through_request,
+)
 from fastapi import Request
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
@ -33,9 +38,21 @@ from litellm.proxy.pass_through_endpoints.types import PassthroughStandardLoggin
@pytest.fixture
 def mock_request():
    # Create a mock request with headers
+    class QueryParams:
+        def __init__(self):
+            self._dict = {}
+
    class MockRequest:
-        def __init__(self, headers=None):
+        def __init__(
+            self, headers=None, method="POST", request_body: Optional[dict] = None
+        ):
            self.headers = headers or {}
+            self.query_params = QueryParams()
+            self.method = method
+            self.request_body = request_body or {}
+
+        async def body(self) -> bytes:
+            return bytes(json.dumps(self.request_body), "utf-8")

    return MockRequest

@ -163,3 +180,143 @@ def test_init_kwargs_with_tags_in_header(mock_request, mock_user_api_key_dict):
    metadata = result["litellm_params"]["metadata"]
    print("metadata", metadata)
    assert metadata["tags"] == ["tag1", "tag2"]
+
+
+athropic_request_body = {
+    "model": "claude-3-5-sonnet-20241022",
+    "max_tokens": 256,
+    "messages": [{"role": "user", "content": "Hello, world tell me 2 sentences "}],
+    "litellm_metadata": {"tags": ["hi", "hello"]},
+}
+
+
+@pytest.mark.asyncio
+async def test_pass_through_request_logging_failure(
+    mock_request, mock_user_api_key_dict
+):
+    """
+    Test that pass_through_request still returns a response even if logging raises an Exception
+    """
+
+    # Mock the logging handler to raise an error
+    async def mock_logging_failure(*args, **kwargs):
+        raise Exception("Logging failed!")
+
+    # Create a mock response
+    mock_response = AsyncMock()
+    mock_response.status_code = 200
+    mock_response.headers = {"content-type": "application/json"}
+
+    # Add mock content
+    mock_response._content = b'{"mock": "response"}'
+
+    async def mock_aread():
+        return mock_response._content
+
+    mock_response.aread = mock_aread
+
+    # Patch both the logging handler and the httpx client
+    with patch(
+        "litellm.proxy.pass_through_endpoints.pass_through_endpoints.PassThroughEndpointLogging.pass_through_async_success_handler",
+        new=mock_logging_failure,
+    ), patch(
+        "httpx.AsyncClient.send",
+        return_value=mock_response,
+    ), patch(
+        "httpx.AsyncClient.request",
+        return_value=mock_response,
+    ):
+        request = mock_request(
+            headers={}, method="POST", request_body=athropic_request_body
+        )
+        response = await pass_through_request(
+            request=request,
+            target="https://exampleopenaiendpoint-production.up.railway.app/v1/messages",
+            custom_headers={},
+            user_api_key_dict=mock_user_api_key_dict,
+        )
+
+        # Assert response was returned successfully despite logging failure
+        assert response.status_code == 200
+
+        # Verify we got the mock response content
+        if hasattr(response, "body"):
+            content = response.body
+        else:
+            content = await response.aread()
+
+        assert content == b'{"mock": "response"}'
+
+
+@pytest.mark.asyncio
+async def test_pass_through_request_logging_failure_with_stream(
+    mock_request, mock_user_api_key_dict
+):
+    """
+    Test that pass_through_request still returns a response even if logging raises an Exception
+    """
+
+    # Mock the logging handler to raise an error
+    async def mock_logging_failure(*args, **kwargs):
+        raise Exception("Logging failed!")
+
+    # Create a mock response
+    mock_response = AsyncMock()
+    mock_response.status_code = 200
+
+    # Add headers property to mock response
+    mock_response.headers = {
+        "content-type": "application/json",  # Not streaming
+    }
+
+    # Create mock chunks for streaming
+    mock_chunks = [b'{"chunk": 1}', b'{"chunk": 2}']
+    mock_response.body_iterator = AsyncMock()
+    mock_response.body_iterator.__aiter__.return_value = mock_chunks
+
+    # Add aread method to mock response
+    mock_response._content = b'{"mock": "response"}'
+
+    async def mock_aread():
+        return mock_response._content
+
+    mock_response.aread = mock_aread
+
+    # Patch both the logging handler and the httpx client
+    with patch(
+        "litellm.proxy.pass_through_endpoints.streaming_handler.PassThroughStreamingHandler._route_streaming_logging_to_handler",
+        new=mock_logging_failure,
+    ), patch(
+        "httpx.AsyncClient.send",
+        return_value=mock_response,
+    ), patch(
+        "httpx.AsyncClient.request",
+        return_value=mock_response,
+    ):
+        request = mock_request(
+            headers={}, method="POST", request_body=athropic_request_body
+        )
+        response = await pass_through_request(
+            request=request,
+            target="https://exampleopenaiendpoint-production.up.railway.app/v1/messages",
+            custom_headers={},
+            user_api_key_dict=mock_user_api_key_dict,
+        )
+
+        # Assert response was returned successfully despite logging failure
+        assert response.status_code == 200
+
+        # For non-streaming responses, we can access the content directly
+        if hasattr(response, "body"):
+            content = response.body
+        else:
+            # For streaming responses, we need to read the chunks
+            chunks = []
+            async for chunk in response.body_iterator:
+                chunks.append(chunk)
+            content = b"".join(chunks)
+
+        # Verify we got some response content
+        assert content is not None
+        if isinstance(content, bytes):
+            assert len(content) > 0