Merge 2ab4fc96e4 into b82af5b826

2025-04-25 02:34:29 +00:00 · 2025-04-24 15:25:27 +08:00 · 2025-04-24 15:25:27 +08:00 · 512860bb8d
commit 512860bb8d
parent b82af5b826 2ab4fc96e4
2 changed files with 22 additions and 0 deletions
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@ -459,6 +459,7 @@ class OpenAIChatCompletion(BaseLLM, BaseOpenAILLM):
            else:
                headers = {}
            response = raw_response.parse()
            # raw_response.http_response.close()
            return headers, response
        except Exception as e:
            if raw_response is not None:
--- a/tests/load_tests/test_memory_usage.py
+++ b/tests/load_tests/test_memory_usage.py
@ -83,6 +83,13 @@ async def make_text_completion_request():
        api_base="https://exampleopenaiendpoint-production.up.railway.app/",
    )
 def make_streaming_completion_request():
    return litellm.acompletion(
        model="openai/gpt-4o",
        messages=[{"role": "user", "content": "Test message for memory usage"}],
        stream=True,
    )
@pytest.mark.asyncio
@pytest.mark.skip(
@ -102,6 +109,20 @@ async def test_atext_completion_memory():
    await run_memory_test(make_text_completion_request, "atext_completion")
@pytest.mark.skip(
    reason="This test is too slow to run on every commit. We can use this after nightly release"
 )
 def test_streaming_completion_memory():
    """Test memory usage for streaming litellm.acompletion"""
    run_memory_test(make_streaming_completion_request,"completion")
@pytest.mark.skip(
    reason="This test is too slow to run on every commit. We can use this after nightly release"
 )    
 def test_streaming_acompletion_memory():
    """Test memory usage for streaming litellm.atext_completion"""
    run_memory_test(make_streaming_completion_request,"acompletion")
 litellm_router = Router(
    model_list=[
        {