Merge pull request #5646 from BerriAI/litellm_add_load_testing_logging

[Feat] Add Load Testing for Langsmith, and OTEL logging
2024-09-11 21:30:37 -07:00 · 2024-09-11 21:30:37 -07:00 · 9d2b09099f
commit 9d2b09099f
parent 129113143e 88706488f9
3 changed files with 250 additions and 1 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -149,6 +149,33 @@ jobs:
      # Store test results
      - store_test_results:
          path: test-results
  load_testing:
    docker:
      - image: cimg/python:3.11
    working_directory: ~/project
    steps:
      - checkout
      - run:
          name: Install Dependencies
          command: |
            python -m pip install --upgrade pip
            python -m pip install -r requirements.txt
            pip install "pytest==7.3.1"
            pip install "pytest-retry==1.6.3"
            pip install "pytest-asyncio==0.21.1"
      # Run pytest and generate JUnit XML report
      - run:
          name: Run tests
          command: |
            pwd
            ls
            python -m pytest -vv tests/load_tests -x -s -v --junitxml=test-results/junit.xml --durations=5
          no_output_timeout: 120m
      # Store test results
      - store_test_results:
          path: test-results
  installing_litellm_on_python:
    docker:
@ -289,7 +316,7 @@ jobs:
          command: |
            pwd
            ls
-            python -m pytest -s -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests
+            python -m pytest -s -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests
          no_output_timeout: 120m
      # Store test results
@ -607,10 +634,17 @@ workflows:
              only:
                - main
                - /litellm_.*/
      - load_testing:
          filters:
            branches:
              only:
                - main
                - /litellm_.*/
      - publish_to_pypi:
          requires:
            - local_testing
            - build_and_test
            - load_testing
            - proxy_log_to_otel_tests
            - proxy_pass_through_endpoint_tests
          filters:
--- a/tests/load_tests/test_langsmith_load_test.py
+++ b/tests/load_tests/test_langsmith_load_test.py
@ -0,0 +1,116 @@
 import sys
 import os
 sys.path.insert(0, os.path.abspath("../.."))
 import asyncio
 import litellm
 from litellm._logging import verbose_logger
 import logging
 import time
 import pytest
 def test_langsmith_logging_async():
    try:
        os.environ["LANGSMITH_API_KEY"] = "lsv2_anything"
        os.environ["LANGSMITH_PROJECT"] = "pr-b"
        os.environ["LANGSMITH_BASE_URL"] = (
            "https://exampleopenaiendpoint-production.up.railway.app"
        )
        percentage_diffs = []
        for run in range(3):
            print(f"\nRun {run + 1}:")
            # Test with empty success_callback
            litellm.success_callback = []
            litellm.callbacks = []
            litellm._async_success_callback = []
            litellm._async_failure_callback = []
            litellm.failure_callback = []
            start_time_empty_callback = asyncio.run(make_async_calls())
            print("Done with no callback test")
            # Test with langsmith callback
            print("Starting langsmith test")
            litellm.success_callback = ["langsmith"]
            start_time_langsmith = asyncio.run(make_async_calls())
            print("Done with langsmith test")
            # Compare times and calculate percentage difference
            print(f"Time with success_callback='langsmith': {start_time_langsmith}")
            print(f"Time with empty success_callback: {start_time_empty_callback}")
            percentage_diff = (
                abs(start_time_langsmith - start_time_empty_callback)
                / start_time_empty_callback
                * 100
            )
            percentage_diffs.append(percentage_diff)
            print(f"Performance difference: {percentage_diff:.2f}%")
        print("percentage_diffs", percentage_diffs)
        # Calculate average percentage difference
        avg_percentage_diff = sum(percentage_diffs) / len(percentage_diffs)
        print(f"\nAverage performance difference: {avg_percentage_diff:.2f}%")
        # Assert that the average difference is not more than 10%
        assert (
            avg_percentage_diff < 10
        ), f"Average performance difference of {avg_percentage_diff:.2f}% exceeds 10% threshold"
    except litellm.Timeout as e:
        pass
    except Exception as e:
        pytest.fail(f"An exception occurred - {e}")
    except litellm.Timeout as e:
        pass
    except Exception as e:
        pytest.fail(f"An exception occurred - {e}")
 async def make_async_calls(metadata=None, **completion_kwargs):
    total_tasks = 300
    batch_size = 100
    total_time = 0
    for batch in range(3):
        tasks = [create_async_task() for _ in range(batch_size)]
        start_time = asyncio.get_event_loop().time()
        responses = await asyncio.gather(*tasks)
        for idx, response in enumerate(responses):
            print(f"Response from Task {batch * batch_size + idx + 1}: {response}")
        await asyncio.sleep(1)
        batch_time = asyncio.get_event_loop().time() - start_time
        total_time += batch_time
    return total_time
 def create_async_task(**completion_kwargs):
    """
    Creates an async task for the litellm.acompletion function.
    This is just the task, but it is not run here.
    To run the task it must be awaited or used in other asyncio coroutine execution functions like asyncio.gather.
    Any kwargs passed to this function will be passed to the litellm.acompletion function.
    By default a standard set of arguments are used for the litellm.acompletion function.
    """
    completion_args = {
        "model": "openai/chatgpt-v-2",
        "api_version": "2024-02-01",
        "messages": [{"role": "user", "content": "This is a test"}],
        "max_tokens": 5,
        "temperature": 0.7,
        "timeout": 5,
        "user": "langfuse_latency_test_user",
        "mock_response": "hello from my load test",
    }
    completion_args.update(completion_kwargs)
    return asyncio.create_task(litellm.acompletion(**completion_args))
--- a/tests/load_tests/test_otel_load_test.py
+++ b/tests/load_tests/test_otel_load_test.py
@ -0,0 +1,99 @@
 import sys
 import os
 sys.path.insert(0, os.path.abspath("../.."))
 import asyncio
 import litellm
 from litellm._logging import verbose_logger
 import logging
 import time
 import pytest
 def test_otel_logging_async():
    try:
        os.environ["OTEL_EXPORTER"] = "otlp_http"
        os.environ["OTEL_ENDPOINT"] = (
            "https://exampleopenaiendpoint-production.up.railway.app/traces"
        )
        os.environ["OTEL_HEADERS"] = "Authorization=K0BSwd"
        def single_run():
            litellm.callbacks = []
            start_time_empty = asyncio.run(make_async_calls())
            print(f"Time with empty callback: {start_time_empty}")
            litellm.callbacks = ["otel"]
            start_time_otel = asyncio.run(make_async_calls())
            print(f"Time with otel callback: {start_time_otel}")
            percent_diff = (
                abs(start_time_otel - start_time_empty) / start_time_empty * 100
            )
            print(f"Run performance difference: {percent_diff:.2f}%")
            return percent_diff
        percent_diffs = [single_run() for _ in range(3)]
        avg_percent_diff = sum(percent_diffs) / len(percent_diffs)
        print(f"Percentage differences: {percent_diffs}")
        print(f"Average performance difference: {avg_percent_diff:.2f}%")
        assert (
            avg_percent_diff < 10
        ), f"Average performance difference of {avg_percent_diff:.2f}% exceeds 10% threshold"
    except litellm.Timeout as e:
        pass
    except Exception as e:
        pytest.fail(f"An exception occurred - {e}")
 async def make_async_calls(metadata=None, **completion_kwargs):
    total_start_time = asyncio.get_event_loop().time()
    tasks = []
    async def create_and_run_task():
        task = create_async_task(**completion_kwargs)
        response = await task
        print(f"Response: {response}")
    for _ in range(3):  # Run for 10 seconds
        # Create 100 tasks
        tasks = []
        for _ in range(100):
            tasks.append(asyncio.create_task(create_and_run_task()))
        # Wait for any remaining tasks to complete
        await asyncio.gather(*tasks)
        await asyncio.sleep(1)
    # Calculate the total time taken
    total_time = asyncio.get_event_loop().time() - total_start_time
    return total_time
 def create_async_task(**completion_kwargs):
    """
    Creates an async task for the litellm.acompletion function.
    This is just the task, but it is not run here.
    To run the task it must be awaited or used in other asyncio coroutine execution functions like asyncio.gather.
    Any kwargs passed to this function will be passed to the litellm.acompletion function.
    By default a standard set of arguments are used for the litellm.acompletion function.
    """
    completion_args = {
        "model": "openai/chatgpt-v-2",
        "api_version": "2024-02-01",
        "messages": [{"role": "user", "content": "This is a test" * 100}],
        "max_tokens": 5,
        "temperature": 0.7,
        "timeout": 5,
        "user": "langfuse_latency_test_user",
        "mock_response": "Mock response",
    }
    completion_args.update(completion_kwargs)
    return asyncio.create_task(litellm.acompletion(**completion_args))