Merge pull request #5646 from BerriAI/litellm_add_load_testing_logging

[Feat] Add Load Testing for Langsmith, and OTEL logging
This commit is contained in:
Ishaan Jaff 2024-09-11 21:30:37 -07:00 committed by GitHub
commit 9d2b09099f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 250 additions and 1 deletions

View file

@ -149,6 +149,33 @@ jobs:
# Store test results
- store_test_results:
path: test-results
load_testing:
docker:
- image: cimg/python:3.11
working_directory: ~/project
steps:
- checkout
- run:
name: Install Dependencies
command: |
python -m pip install --upgrade pip
python -m pip install -r requirements.txt
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"
pip install "pytest-asyncio==0.21.1"
# Run pytest and generate JUnit XML report
- run:
name: Run tests
command: |
pwd
ls
python -m pytest -vv tests/load_tests -x -s -v --junitxml=test-results/junit.xml --durations=5
no_output_timeout: 120m
# Store test results
- store_test_results:
path: test-results
installing_litellm_on_python:
docker:
@ -289,7 +316,7 @@ jobs:
command: |
pwd
ls
python -m pytest -s -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests
python -m pytest -s -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests
no_output_timeout: 120m
# Store test results
@ -607,10 +634,17 @@ workflows:
only:
- main
- /litellm_.*/
- load_testing:
filters:
branches:
only:
- main
- /litellm_.*/
- publish_to_pypi:
requires:
- local_testing
- build_and_test
- load_testing
- proxy_log_to_otel_tests
- proxy_pass_through_endpoint_tests
filters:

View file

@ -0,0 +1,116 @@
import sys
import os
sys.path.insert(0, os.path.abspath("../.."))
import asyncio
import litellm
from litellm._logging import verbose_logger
import logging
import time
import pytest
def test_langsmith_logging_async():
try:
os.environ["LANGSMITH_API_KEY"] = "lsv2_anything"
os.environ["LANGSMITH_PROJECT"] = "pr-b"
os.environ["LANGSMITH_BASE_URL"] = (
"https://exampleopenaiendpoint-production.up.railway.app"
)
percentage_diffs = []
for run in range(3):
print(f"\nRun {run + 1}:")
# Test with empty success_callback
litellm.success_callback = []
litellm.callbacks = []
litellm._async_success_callback = []
litellm._async_failure_callback = []
litellm.failure_callback = []
start_time_empty_callback = asyncio.run(make_async_calls())
print("Done with no callback test")
# Test with langsmith callback
print("Starting langsmith test")
litellm.success_callback = ["langsmith"]
start_time_langsmith = asyncio.run(make_async_calls())
print("Done with langsmith test")
# Compare times and calculate percentage difference
print(f"Time with success_callback='langsmith': {start_time_langsmith}")
print(f"Time with empty success_callback: {start_time_empty_callback}")
percentage_diff = (
abs(start_time_langsmith - start_time_empty_callback)
/ start_time_empty_callback
* 100
)
percentage_diffs.append(percentage_diff)
print(f"Performance difference: {percentage_diff:.2f}%")
print("percentage_diffs", percentage_diffs)
# Calculate average percentage difference
avg_percentage_diff = sum(percentage_diffs) / len(percentage_diffs)
print(f"\nAverage performance difference: {avg_percentage_diff:.2f}%")
# Assert that the average difference is not more than 10%
assert (
avg_percentage_diff < 10
), f"Average performance difference of {avg_percentage_diff:.2f}% exceeds 10% threshold"
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
async def make_async_calls(metadata=None, **completion_kwargs):
total_tasks = 300
batch_size = 100
total_time = 0
for batch in range(3):
tasks = [create_async_task() for _ in range(batch_size)]
start_time = asyncio.get_event_loop().time()
responses = await asyncio.gather(*tasks)
for idx, response in enumerate(responses):
print(f"Response from Task {batch * batch_size + idx + 1}: {response}")
await asyncio.sleep(1)
batch_time = asyncio.get_event_loop().time() - start_time
total_time += batch_time
return total_time
def create_async_task(**completion_kwargs):
"""
Creates an async task for the litellm.acompletion function.
This is just the task, but it is not run here.
To run the task it must be awaited or used in other asyncio coroutine execution functions like asyncio.gather.
Any kwargs passed to this function will be passed to the litellm.acompletion function.
By default a standard set of arguments are used for the litellm.acompletion function.
"""
completion_args = {
"model": "openai/chatgpt-v-2",
"api_version": "2024-02-01",
"messages": [{"role": "user", "content": "This is a test"}],
"max_tokens": 5,
"temperature": 0.7,
"timeout": 5,
"user": "langfuse_latency_test_user",
"mock_response": "hello from my load test",
}
completion_args.update(completion_kwargs)
return asyncio.create_task(litellm.acompletion(**completion_args))

View file

@ -0,0 +1,99 @@
import sys
import os
sys.path.insert(0, os.path.abspath("../.."))
import asyncio
import litellm
from litellm._logging import verbose_logger
import logging
import time
import pytest
def test_otel_logging_async():
try:
os.environ["OTEL_EXPORTER"] = "otlp_http"
os.environ["OTEL_ENDPOINT"] = (
"https://exampleopenaiendpoint-production.up.railway.app/traces"
)
os.environ["OTEL_HEADERS"] = "Authorization=K0BSwd"
def single_run():
litellm.callbacks = []
start_time_empty = asyncio.run(make_async_calls())
print(f"Time with empty callback: {start_time_empty}")
litellm.callbacks = ["otel"]
start_time_otel = asyncio.run(make_async_calls())
print(f"Time with otel callback: {start_time_otel}")
percent_diff = (
abs(start_time_otel - start_time_empty) / start_time_empty * 100
)
print(f"Run performance difference: {percent_diff:.2f}%")
return percent_diff
percent_diffs = [single_run() for _ in range(3)]
avg_percent_diff = sum(percent_diffs) / len(percent_diffs)
print(f"Percentage differences: {percent_diffs}")
print(f"Average performance difference: {avg_percent_diff:.2f}%")
assert (
avg_percent_diff < 10
), f"Average performance difference of {avg_percent_diff:.2f}% exceeds 10% threshold"
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
async def make_async_calls(metadata=None, **completion_kwargs):
total_start_time = asyncio.get_event_loop().time()
tasks = []
async def create_and_run_task():
task = create_async_task(**completion_kwargs)
response = await task
print(f"Response: {response}")
for _ in range(3): # Run for 10 seconds
# Create 100 tasks
tasks = []
for _ in range(100):
tasks.append(asyncio.create_task(create_and_run_task()))
# Wait for any remaining tasks to complete
await asyncio.gather(*tasks)
await asyncio.sleep(1)
# Calculate the total time taken
total_time = asyncio.get_event_loop().time() - total_start_time
return total_time
def create_async_task(**completion_kwargs):
"""
Creates an async task for the litellm.acompletion function.
This is just the task, but it is not run here.
To run the task it must be awaited or used in other asyncio coroutine execution functions like asyncio.gather.
Any kwargs passed to this function will be passed to the litellm.acompletion function.
By default a standard set of arguments are used for the litellm.acompletion function.
"""
completion_args = {
"model": "openai/chatgpt-v-2",
"api_version": "2024-02-01",
"messages": [{"role": "user", "content": "This is a test" * 100}],
"max_tokens": 5,
"temperature": 0.7,
"timeout": 5,
"user": "langfuse_latency_test_user",
"mock_response": "Mock response",
}
completion_args.update(completion_kwargs)
return asyncio.create_task(litellm.acompletion(**completion_args))