forked from phoenix/litellm-mirror
Merge pull request #5646 from BerriAI/litellm_add_load_testing_logging
[Feat] Add Load Testing for Langsmith, and OTEL logging
This commit is contained in:
commit
9d2b09099f
3 changed files with 250 additions and 1 deletions
|
@ -149,6 +149,33 @@ jobs:
|
||||||
# Store test results
|
# Store test results
|
||||||
- store_test_results:
|
- store_test_results:
|
||||||
path: test-results
|
path: test-results
|
||||||
|
load_testing:
|
||||||
|
docker:
|
||||||
|
- image: cimg/python:3.11
|
||||||
|
working_directory: ~/project
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- run:
|
||||||
|
name: Install Dependencies
|
||||||
|
command: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
python -m pip install -r requirements.txt
|
||||||
|
pip install "pytest==7.3.1"
|
||||||
|
pip install "pytest-retry==1.6.3"
|
||||||
|
pip install "pytest-asyncio==0.21.1"
|
||||||
|
# Run pytest and generate JUnit XML report
|
||||||
|
- run:
|
||||||
|
name: Run tests
|
||||||
|
command: |
|
||||||
|
pwd
|
||||||
|
ls
|
||||||
|
python -m pytest -vv tests/load_tests -x -s -v --junitxml=test-results/junit.xml --durations=5
|
||||||
|
no_output_timeout: 120m
|
||||||
|
|
||||||
|
# Store test results
|
||||||
|
- store_test_results:
|
||||||
|
path: test-results
|
||||||
|
|
||||||
installing_litellm_on_python:
|
installing_litellm_on_python:
|
||||||
docker:
|
docker:
|
||||||
|
@ -289,7 +316,7 @@ jobs:
|
||||||
command: |
|
command: |
|
||||||
pwd
|
pwd
|
||||||
ls
|
ls
|
||||||
python -m pytest -s -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests
|
python -m pytest -s -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests
|
||||||
no_output_timeout: 120m
|
no_output_timeout: 120m
|
||||||
|
|
||||||
# Store test results
|
# Store test results
|
||||||
|
@ -607,10 +634,17 @@ workflows:
|
||||||
only:
|
only:
|
||||||
- main
|
- main
|
||||||
- /litellm_.*/
|
- /litellm_.*/
|
||||||
|
- load_testing:
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- main
|
||||||
|
- /litellm_.*/
|
||||||
- publish_to_pypi:
|
- publish_to_pypi:
|
||||||
requires:
|
requires:
|
||||||
- local_testing
|
- local_testing
|
||||||
- build_and_test
|
- build_and_test
|
||||||
|
- load_testing
|
||||||
- proxy_log_to_otel_tests
|
- proxy_log_to_otel_tests
|
||||||
- proxy_pass_through_endpoint_tests
|
- proxy_pass_through_endpoint_tests
|
||||||
filters:
|
filters:
|
||||||
|
|
116
tests/load_tests/test_langsmith_load_test.py
Normal file
116
tests/load_tests/test_langsmith_load_test.py
Normal file
|
@ -0,0 +1,116 @@
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.abspath("../.."))
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def test_langsmith_logging_async():
|
||||||
|
try:
|
||||||
|
os.environ["LANGSMITH_API_KEY"] = "lsv2_anything"
|
||||||
|
os.environ["LANGSMITH_PROJECT"] = "pr-b"
|
||||||
|
os.environ["LANGSMITH_BASE_URL"] = (
|
||||||
|
"https://exampleopenaiendpoint-production.up.railway.app"
|
||||||
|
)
|
||||||
|
|
||||||
|
percentage_diffs = []
|
||||||
|
|
||||||
|
for run in range(3):
|
||||||
|
print(f"\nRun {run + 1}:")
|
||||||
|
|
||||||
|
# Test with empty success_callback
|
||||||
|
litellm.success_callback = []
|
||||||
|
litellm.callbacks = []
|
||||||
|
litellm._async_success_callback = []
|
||||||
|
litellm._async_failure_callback = []
|
||||||
|
litellm.failure_callback = []
|
||||||
|
start_time_empty_callback = asyncio.run(make_async_calls())
|
||||||
|
print("Done with no callback test")
|
||||||
|
|
||||||
|
# Test with langsmith callback
|
||||||
|
print("Starting langsmith test")
|
||||||
|
litellm.success_callback = ["langsmith"]
|
||||||
|
start_time_langsmith = asyncio.run(make_async_calls())
|
||||||
|
print("Done with langsmith test")
|
||||||
|
|
||||||
|
# Compare times and calculate percentage difference
|
||||||
|
print(f"Time with success_callback='langsmith': {start_time_langsmith}")
|
||||||
|
print(f"Time with empty success_callback: {start_time_empty_callback}")
|
||||||
|
|
||||||
|
percentage_diff = (
|
||||||
|
abs(start_time_langsmith - start_time_empty_callback)
|
||||||
|
/ start_time_empty_callback
|
||||||
|
* 100
|
||||||
|
)
|
||||||
|
percentage_diffs.append(percentage_diff)
|
||||||
|
print(f"Performance difference: {percentage_diff:.2f}%")
|
||||||
|
print("percentage_diffs", percentage_diffs)
|
||||||
|
# Calculate average percentage difference
|
||||||
|
avg_percentage_diff = sum(percentage_diffs) / len(percentage_diffs)
|
||||||
|
print(f"\nAverage performance difference: {avg_percentage_diff:.2f}%")
|
||||||
|
|
||||||
|
# Assert that the average difference is not more than 10%
|
||||||
|
assert (
|
||||||
|
avg_percentage_diff < 10
|
||||||
|
), f"Average performance difference of {avg_percentage_diff:.2f}% exceeds 10% threshold"
|
||||||
|
|
||||||
|
except litellm.Timeout as e:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"An exception occurred - {e}")
|
||||||
|
|
||||||
|
except litellm.Timeout as e:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"An exception occurred - {e}")
|
||||||
|
|
||||||
|
|
||||||
|
async def make_async_calls(metadata=None, **completion_kwargs):
|
||||||
|
total_tasks = 300
|
||||||
|
batch_size = 100
|
||||||
|
total_time = 0
|
||||||
|
|
||||||
|
for batch in range(3):
|
||||||
|
tasks = [create_async_task() for _ in range(batch_size)]
|
||||||
|
|
||||||
|
start_time = asyncio.get_event_loop().time()
|
||||||
|
responses = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
for idx, response in enumerate(responses):
|
||||||
|
print(f"Response from Task {batch * batch_size + idx + 1}: {response}")
|
||||||
|
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
|
batch_time = asyncio.get_event_loop().time() - start_time
|
||||||
|
total_time += batch_time
|
||||||
|
|
||||||
|
return total_time
|
||||||
|
|
||||||
|
|
||||||
|
def create_async_task(**completion_kwargs):
|
||||||
|
"""
|
||||||
|
Creates an async task for the litellm.acompletion function.
|
||||||
|
This is just the task, but it is not run here.
|
||||||
|
To run the task it must be awaited or used in other asyncio coroutine execution functions like asyncio.gather.
|
||||||
|
Any kwargs passed to this function will be passed to the litellm.acompletion function.
|
||||||
|
By default a standard set of arguments are used for the litellm.acompletion function.
|
||||||
|
"""
|
||||||
|
completion_args = {
|
||||||
|
"model": "openai/chatgpt-v-2",
|
||||||
|
"api_version": "2024-02-01",
|
||||||
|
"messages": [{"role": "user", "content": "This is a test"}],
|
||||||
|
"max_tokens": 5,
|
||||||
|
"temperature": 0.7,
|
||||||
|
"timeout": 5,
|
||||||
|
"user": "langfuse_latency_test_user",
|
||||||
|
"mock_response": "hello from my load test",
|
||||||
|
}
|
||||||
|
completion_args.update(completion_kwargs)
|
||||||
|
return asyncio.create_task(litellm.acompletion(**completion_args))
|
99
tests/load_tests/test_otel_load_test.py
Normal file
99
tests/load_tests/test_otel_load_test.py
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.abspath("../.."))
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def test_otel_logging_async():
|
||||||
|
try:
|
||||||
|
os.environ["OTEL_EXPORTER"] = "otlp_http"
|
||||||
|
os.environ["OTEL_ENDPOINT"] = (
|
||||||
|
"https://exampleopenaiendpoint-production.up.railway.app/traces"
|
||||||
|
)
|
||||||
|
os.environ["OTEL_HEADERS"] = "Authorization=K0BSwd"
|
||||||
|
|
||||||
|
def single_run():
|
||||||
|
litellm.callbacks = []
|
||||||
|
start_time_empty = asyncio.run(make_async_calls())
|
||||||
|
print(f"Time with empty callback: {start_time_empty}")
|
||||||
|
|
||||||
|
litellm.callbacks = ["otel"]
|
||||||
|
start_time_otel = asyncio.run(make_async_calls())
|
||||||
|
print(f"Time with otel callback: {start_time_otel}")
|
||||||
|
|
||||||
|
percent_diff = (
|
||||||
|
abs(start_time_otel - start_time_empty) / start_time_empty * 100
|
||||||
|
)
|
||||||
|
print(f"Run performance difference: {percent_diff:.2f}%")
|
||||||
|
return percent_diff
|
||||||
|
|
||||||
|
percent_diffs = [single_run() for _ in range(3)]
|
||||||
|
avg_percent_diff = sum(percent_diffs) / len(percent_diffs)
|
||||||
|
|
||||||
|
print(f"Percentage differences: {percent_diffs}")
|
||||||
|
print(f"Average performance difference: {avg_percent_diff:.2f}%")
|
||||||
|
|
||||||
|
assert (
|
||||||
|
avg_percent_diff < 10
|
||||||
|
), f"Average performance difference of {avg_percent_diff:.2f}% exceeds 10% threshold"
|
||||||
|
|
||||||
|
except litellm.Timeout as e:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"An exception occurred - {e}")
|
||||||
|
|
||||||
|
|
||||||
|
async def make_async_calls(metadata=None, **completion_kwargs):
|
||||||
|
total_start_time = asyncio.get_event_loop().time()
|
||||||
|
tasks = []
|
||||||
|
|
||||||
|
async def create_and_run_task():
|
||||||
|
task = create_async_task(**completion_kwargs)
|
||||||
|
response = await task
|
||||||
|
print(f"Response: {response}")
|
||||||
|
|
||||||
|
for _ in range(3): # Run for 10 seconds
|
||||||
|
# Create 100 tasks
|
||||||
|
tasks = []
|
||||||
|
for _ in range(100):
|
||||||
|
tasks.append(asyncio.create_task(create_and_run_task()))
|
||||||
|
|
||||||
|
# Wait for any remaining tasks to complete
|
||||||
|
await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
|
# Calculate the total time taken
|
||||||
|
total_time = asyncio.get_event_loop().time() - total_start_time
|
||||||
|
|
||||||
|
return total_time
|
||||||
|
|
||||||
|
|
||||||
|
def create_async_task(**completion_kwargs):
|
||||||
|
"""
|
||||||
|
Creates an async task for the litellm.acompletion function.
|
||||||
|
This is just the task, but it is not run here.
|
||||||
|
To run the task it must be awaited or used in other asyncio coroutine execution functions like asyncio.gather.
|
||||||
|
Any kwargs passed to this function will be passed to the litellm.acompletion function.
|
||||||
|
By default a standard set of arguments are used for the litellm.acompletion function.
|
||||||
|
"""
|
||||||
|
completion_args = {
|
||||||
|
"model": "openai/chatgpt-v-2",
|
||||||
|
"api_version": "2024-02-01",
|
||||||
|
"messages": [{"role": "user", "content": "This is a test" * 100}],
|
||||||
|
"max_tokens": 5,
|
||||||
|
"temperature": 0.7,
|
||||||
|
"timeout": 5,
|
||||||
|
"user": "langfuse_latency_test_user",
|
||||||
|
"mock_response": "Mock response",
|
||||||
|
}
|
||||||
|
completion_args.update(completion_kwargs)
|
||||||
|
return asyncio.create_task(litellm.acompletion(**completion_args))
|
Loading…
Add table
Add a link
Reference in a new issue