From 2f429f37b7f8795b22c9869511a98d2203cefece Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Fri, 19 Jan 2024 11:28:10 -0800 Subject: [PATCH] (test) test latency added with langfuse call --- litellm/tests/test_alangfuse.py | 64 ++++++++++++++++++++------- litellm/tests/test_async_callbacks.py | 49 -------------------- 2 files changed, 48 insertions(+), 65 deletions(-) delete mode 100644 litellm/tests/test_async_callbacks.py diff --git a/litellm/tests/test_alangfuse.py b/litellm/tests/test_alangfuse.py index 6a10528dfc..e87420433c 100644 --- a/litellm/tests/test_alangfuse.py +++ b/litellm/tests/test_alangfuse.py @@ -99,36 +99,68 @@ def pre_langfuse_setup(): return -@pytest.mark.skip(reason="beta test - checking langfuse output") def test_langfuse_logging_async(): + # this tests time added to make langfuse logging calls, vs just acompletion calls try: pre_langfuse_setup() litellm.set_verbose = True + + # Make 5 calls with an empty success_callback + litellm.success_callback = [] + start_time_empty_callback = asyncio.run(make_async_calls()) + print("done with no callback test") + + print("starting langfuse test") + # Make 5 calls with success_callback set to "langfuse" litellm.success_callback = ["langfuse"] + start_time_langfuse = asyncio.run(make_async_calls()) + print("done with langfuse test") - async def _test_langfuse(): - response = await litellm.acompletion( - model="azure/chatgpt-v-2", - messages=[{"role": "user", "content": "This is a test"}], - max_tokens=100, - temperature=0.7, - timeout=5, - user="test_user", - ) - await asyncio.sleep(1) - return response + # Compare the time for both scenarios + print(f"Time taken with success_callback='langfuse': {start_time_langfuse}") + print(f"Time taken with empty success_callback: {start_time_empty_callback}") - response = asyncio.run(_test_langfuse()) - print(f"response: {response}") + # assert the diff is not more than 1 second - this was 5 seconds before the fix + assert abs(start_time_langfuse - start_time_empty_callback) < 1 - # # check langfuse.log to see if there was a failed response - search_logs("langfuse.log") except litellm.Timeout as e: pass except Exception as e: pytest.fail(f"An exception occurred - {e}") +async def make_async_calls(): + tasks = [] + for _ in range(5): + task = asyncio.create_task( + litellm.acompletion( + model="azure/chatgpt-v-2", + messages=[{"role": "user", "content": "This is a test"}], + max_tokens=5, + temperature=0.7, + timeout=5, + user="test_user", + mock_response="It's simple to use and easy to get started", + ) + ) + tasks.append(task) + + # Measure the start time before running the tasks + start_time = asyncio.get_event_loop().time() + + # Wait for all tasks to complete + responses = await asyncio.gather(*tasks) + + # Print the responses when tasks return + for idx, response in enumerate(responses): + print(f"Response from Task {idx + 1}: {response}") + + # Calculate the total time taken + total_time = asyncio.get_event_loop().time() - start_time + + return total_time + + # def test_langfuse_logging_async_text_completion(): # try: # pre_langfuse_setup() diff --git a/litellm/tests/test_async_callbacks.py b/litellm/tests/test_async_callbacks.py deleted file mode 100644 index 1d586070d8..0000000000 --- a/litellm/tests/test_async_callbacks.py +++ /dev/null @@ -1,49 +0,0 @@ -import json -import sys -import os -import io, asyncio - -import logging - -logging.basicConfig(level=logging.DEBUG) -sys.path.insert(0, os.path.abspath("../..")) - -from litellm import completion -import litellm - -litellm.num_retries = 3 -import time -import pytest - - -async def custom_callback( - kwargs, # kwargs to completion - completion_response, # response from completion - start_time, - end_time, # start/end time -): - # Your custom code here - print("LITELLM: in custom callback function") - print("kwargs", kwargs) - print("completion_response", completion_response) - print("start_time", start_time) - print("end_time", end_time) - time.sleep(1) - - return - - -def test_time_to_run_10_completions(): - litellm.callbacks = [custom_callback] - start = time.time() - - asyncio.run( - litellm.acompletion( - model="gpt-3.5-turbo", messages=[{"role": "user", "content": "hello"}] - ) - ) - end = time.time() - print(f"Time to run 10 completions: {end - start}") - - -test_time_to_run_10_completions()