(test) test latency added with langfuse call

2025-04-27 11:43:54 +00:00 · 2024-01-19 11:28:10 -08:00 · 2024-01-19 11:28:10 -08:00 · 2f429f37b7
commit 2f429f37b7
parent cb40f58cd3
2 changed files with 48 additions and 65 deletions
--- a/litellm/tests/test_alangfuse.py
+++ b/litellm/tests/test_alangfuse.py
@ -99,36 +99,68 @@ def pre_langfuse_setup():
    return
@pytest.mark.skip(reason="beta test - checking langfuse output")
 def test_langfuse_logging_async():
    # this tests time added to make langfuse logging calls, vs just acompletion calls
    try:
        pre_langfuse_setup()
        litellm.set_verbose = True
        # Make 5 calls with an empty success_callback
        litellm.success_callback = []
        start_time_empty_callback = asyncio.run(make_async_calls())
        print("done with no callback test")
        print("starting langfuse test")
        # Make 5 calls with success_callback set to "langfuse"
        litellm.success_callback = ["langfuse"]
        start_time_langfuse = asyncio.run(make_async_calls())
        print("done with langfuse test")
-        async def _test_langfuse():
+        # Compare the time for both scenarios
-            response = await litellm.acompletion(
+        print(f"Time taken with success_callback='langfuse': {start_time_langfuse}")
-                model="azure/chatgpt-v-2",
+        print(f"Time taken with empty success_callback: {start_time_empty_callback}")
                messages=[{"role": "user", "content": "This is a test"}],
                max_tokens=100,
                temperature=0.7,
                timeout=5,
                user="test_user",
            )
            await asyncio.sleep(1)
            return response
-        response = asyncio.run(_test_langfuse())
+        # assert the diff is not more than 1 second - this was 5 seconds before the fix
-        print(f"response: {response}")
+        assert abs(start_time_langfuse - start_time_empty_callback) < 1
        # # check langfuse.log to see if there was a failed response
        search_logs("langfuse.log")
    except litellm.Timeout as e:
        pass
    except Exception as e:
        pytest.fail(f"An exception occurred - {e}")
 async def make_async_calls():
    tasks = []
    for _ in range(5):
        task = asyncio.create_task(
            litellm.acompletion(
                model="azure/chatgpt-v-2",
                messages=[{"role": "user", "content": "This is a test"}],
                max_tokens=5,
                temperature=0.7,
                timeout=5,
                user="test_user",
                mock_response="It's simple to use and easy to get started",
            )
        )
        tasks.append(task)
    # Measure the start time before running the tasks
    start_time = asyncio.get_event_loop().time()
    # Wait for all tasks to complete
    responses = await asyncio.gather(*tasks)
    # Print the responses when tasks return
    for idx, response in enumerate(responses):
        print(f"Response from Task {idx + 1}: {response}")
    # Calculate the total time taken
    total_time = asyncio.get_event_loop().time() - start_time
    return total_time
 # def test_langfuse_logging_async_text_completion():
 #     try:
 #         pre_langfuse_setup()
--- a/litellm/tests/test_async_callbacks.py
+++ b/litellm/tests/test_async_callbacks.py
@ -1,49 +0,0 @@
 import json
 import sys
 import os
 import io, asyncio
 import logging
 logging.basicConfig(level=logging.DEBUG)
 sys.path.insert(0, os.path.abspath("../.."))
 from litellm import completion
 import litellm
 litellm.num_retries = 3
 import time
 import pytest
 async def custom_callback(
    kwargs,  # kwargs to completion
    completion_response,  # response from completion
    start_time,
    end_time,  # start/end time
 ):
    # Your custom code here
    print("LITELLM: in custom callback function")
    print("kwargs", kwargs)
    print("completion_response", completion_response)
    print("start_time", start_time)
    print("end_time", end_time)
    time.sleep(1)
    return
 def test_time_to_run_10_completions():
    litellm.callbacks = [custom_callback]
    start = time.time()
    asyncio.run(
        litellm.acompletion(
            model="gpt-3.5-turbo", messages=[{"role": "user", "content": "hello"}]
        )
    )
    end = time.time()
    print(f"Time to run 10 completions: {end - start}")
 test_time_to_run_10_completions()