mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
(test) test latency added with langfuse call
This commit is contained in:
parent
cb40f58cd3
commit
2f429f37b7
2 changed files with 48 additions and 65 deletions
|
@ -99,36 +99,68 @@ def pre_langfuse_setup():
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="beta test - checking langfuse output")
|
|
||||||
def test_langfuse_logging_async():
|
def test_langfuse_logging_async():
|
||||||
|
# this tests time added to make langfuse logging calls, vs just acompletion calls
|
||||||
try:
|
try:
|
||||||
pre_langfuse_setup()
|
pre_langfuse_setup()
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
# Make 5 calls with an empty success_callback
|
||||||
|
litellm.success_callback = []
|
||||||
|
start_time_empty_callback = asyncio.run(make_async_calls())
|
||||||
|
print("done with no callback test")
|
||||||
|
|
||||||
|
print("starting langfuse test")
|
||||||
|
# Make 5 calls with success_callback set to "langfuse"
|
||||||
litellm.success_callback = ["langfuse"]
|
litellm.success_callback = ["langfuse"]
|
||||||
|
start_time_langfuse = asyncio.run(make_async_calls())
|
||||||
|
print("done with langfuse test")
|
||||||
|
|
||||||
async def _test_langfuse():
|
# Compare the time for both scenarios
|
||||||
response = await litellm.acompletion(
|
print(f"Time taken with success_callback='langfuse': {start_time_langfuse}")
|
||||||
model="azure/chatgpt-v-2",
|
print(f"Time taken with empty success_callback: {start_time_empty_callback}")
|
||||||
messages=[{"role": "user", "content": "This is a test"}],
|
|
||||||
max_tokens=100,
|
|
||||||
temperature=0.7,
|
|
||||||
timeout=5,
|
|
||||||
user="test_user",
|
|
||||||
)
|
|
||||||
await asyncio.sleep(1)
|
|
||||||
return response
|
|
||||||
|
|
||||||
response = asyncio.run(_test_langfuse())
|
# assert the diff is not more than 1 second - this was 5 seconds before the fix
|
||||||
print(f"response: {response}")
|
assert abs(start_time_langfuse - start_time_empty_callback) < 1
|
||||||
|
|
||||||
# # check langfuse.log to see if there was a failed response
|
|
||||||
search_logs("langfuse.log")
|
|
||||||
except litellm.Timeout as e:
|
except litellm.Timeout as e:
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
pytest.fail(f"An exception occurred - {e}")
|
||||||
|
|
||||||
|
|
||||||
|
async def make_async_calls():
|
||||||
|
tasks = []
|
||||||
|
for _ in range(5):
|
||||||
|
task = asyncio.create_task(
|
||||||
|
litellm.acompletion(
|
||||||
|
model="azure/chatgpt-v-2",
|
||||||
|
messages=[{"role": "user", "content": "This is a test"}],
|
||||||
|
max_tokens=5,
|
||||||
|
temperature=0.7,
|
||||||
|
timeout=5,
|
||||||
|
user="test_user",
|
||||||
|
mock_response="It's simple to use and easy to get started",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
tasks.append(task)
|
||||||
|
|
||||||
|
# Measure the start time before running the tasks
|
||||||
|
start_time = asyncio.get_event_loop().time()
|
||||||
|
|
||||||
|
# Wait for all tasks to complete
|
||||||
|
responses = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
# Print the responses when tasks return
|
||||||
|
for idx, response in enumerate(responses):
|
||||||
|
print(f"Response from Task {idx + 1}: {response}")
|
||||||
|
|
||||||
|
# Calculate the total time taken
|
||||||
|
total_time = asyncio.get_event_loop().time() - start_time
|
||||||
|
|
||||||
|
return total_time
|
||||||
|
|
||||||
|
|
||||||
# def test_langfuse_logging_async_text_completion():
|
# def test_langfuse_logging_async_text_completion():
|
||||||
# try:
|
# try:
|
||||||
# pre_langfuse_setup()
|
# pre_langfuse_setup()
|
||||||
|
|
|
@ -1,49 +0,0 @@
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import io, asyncio
|
|
||||||
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
|
||||||
sys.path.insert(0, os.path.abspath("../.."))
|
|
||||||
|
|
||||||
from litellm import completion
|
|
||||||
import litellm
|
|
||||||
|
|
||||||
litellm.num_retries = 3
|
|
||||||
import time
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
|
|
||||||
async def custom_callback(
|
|
||||||
kwargs, # kwargs to completion
|
|
||||||
completion_response, # response from completion
|
|
||||||
start_time,
|
|
||||||
end_time, # start/end time
|
|
||||||
):
|
|
||||||
# Your custom code here
|
|
||||||
print("LITELLM: in custom callback function")
|
|
||||||
print("kwargs", kwargs)
|
|
||||||
print("completion_response", completion_response)
|
|
||||||
print("start_time", start_time)
|
|
||||||
print("end_time", end_time)
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
def test_time_to_run_10_completions():
|
|
||||||
litellm.callbacks = [custom_callback]
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
asyncio.run(
|
|
||||||
litellm.acompletion(
|
|
||||||
model="gpt-3.5-turbo", messages=[{"role": "user", "content": "hello"}]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
end = time.time()
|
|
||||||
print(f"Time to run 10 completions: {end - start}")
|
|
||||||
|
|
||||||
|
|
||||||
test_time_to_run_10_completions()
|
|
Loading…
Add table
Add a link
Reference in a new issue