From a3c70a099e3fdfcbc85d3e3994643211991c3d40 Mon Sep 17 00:00:00 2001 From: Sunny Wan Date: Tue, 4 Mar 2025 18:12:26 -0500 Subject: [PATCH 1/2] fixed memory leak and added test --- litellm/llms/openai/openai.py | 1 + tests/load_tests/test_memory_usage.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index 5465a24945..fdbe4d3475 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -460,6 +460,7 @@ class OpenAIChatCompletion(BaseLLM): else: headers = {} response = raw_response.parse() + raw_response.http_response.close() return headers, response except Exception as e: if raw_response is not None: diff --git a/tests/load_tests/test_memory_usage.py b/tests/load_tests/test_memory_usage.py index f273865a29..63c54f2052 100644 --- a/tests/load_tests/test_memory_usage.py +++ b/tests/load_tests/test_memory_usage.py @@ -83,6 +83,13 @@ async def make_text_completion_request(): api_base="https://exampleopenaiendpoint-production.up.railway.app/", ) +def make_streaming_completion_request(): + return litellm.acompletion( + model="openai/gpt-4o", + messages=[{"role": "user", "content": "Test message for memory usage"}], + stream=True, + ) + @pytest.mark.asyncio @pytest.mark.skip( @@ -102,6 +109,20 @@ async def test_atext_completion_memory(): await run_memory_test(make_text_completion_request, "atext_completion") +@pytest.mark.skip( + reason="This test is too slow to run on every commit. We can use this after nightly release" +) +def test_streaming_completion_memory(): + """Test memory usage for streaming litellm.acompletion""" + run_memory_test(make_streaming_completion_request,"completion") + +@pytest.mark.skip( + reason="This test is too slow to run on every commit. We can use this after nightly release" +) +def test_streaming_acompletion_memory(): + """Test memory usage for streaming litellm.atext_completion""" + run_memory_test(make_streaming_completion_request,"acompletion") + litellm_router = Router( model_list=[ { From 2ab4fc96e460dd3fc10fda27711d063b00345406 Mon Sep 17 00:00:00 2001 From: Sunny Wan Date: Wed, 5 Mar 2025 17:21:16 -0500 Subject: [PATCH 2/2] removed close Can't just close() the response because you still need to stream the results --- litellm/llms/openai/openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index fdbe4d3475..b5d24ad605 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -460,7 +460,7 @@ class OpenAIChatCompletion(BaseLLM): else: headers = {} response = raw_response.parse() - raw_response.http_response.close() + # raw_response.http_response.close() return headers, response except Exception as e: if raw_response is not None: