Merge pull request #5646 from BerriAI/litellm_add_load_testing_logging

[Feat] Add Load Testing for Langsmith, and OTEL logging
2024-09-11 21:30:37 -07:00 · 2024-09-11 21:30:37 -07:00 · 9d2b09099f
commit 9d2b09099f
parent 129113143e 88706488f9
3 changed files with 250 additions and 1 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -149,6 +149,33 @@ jobs:
      # Store test results
      - store_test_results:
          path: test-results
+  load_testing:
+    docker:
+      - image: cimg/python:3.11
+    working_directory: ~/project
+
+    steps:
+      - checkout
+      - run:
+          name: Install Dependencies
+          command: |
+            python -m pip install --upgrade pip
+            python -m pip install -r requirements.txt
+            pip install "pytest==7.3.1"
+            pip install "pytest-retry==1.6.3"
+            pip install "pytest-asyncio==0.21.1"
+      # Run pytest and generate JUnit XML report
+      - run:
+          name: Run tests
+          command: |
+            pwd
+            ls
+            python -m pytest -vv tests/load_tests -x -s -v --junitxml=test-results/junit.xml --durations=5
+          no_output_timeout: 120m
+
+      # Store test results
+      - store_test_results:
+          path: test-results

  installing_litellm_on_python:
    docker:
@ -289,7 +316,7 @@ jobs:
          command: |
            pwd
            ls
-            python -m pytest -s -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests
+            python -m pytest -s -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests
          no_output_timeout: 120m

      # Store test results
@ -607,10 +634,17 @@ workflows:
              only:
                - main
                - /litellm_.*/
+      - load_testing:
+          filters:
+            branches:
+              only:
+                - main
+                - /litellm_.*/
      - publish_to_pypi:
          requires:
            - local_testing
            - build_and_test
+            - load_testing
            - proxy_log_to_otel_tests
            - proxy_pass_through_endpoint_tests
          filters:
--- a/tests/load_tests/test_langsmith_load_test.py
+++ b/tests/load_tests/test_langsmith_load_test.py
@ -0,0 +1,116 @@
+import sys
+
+import os
+
+sys.path.insert(0, os.path.abspath("../.."))
+
+import asyncio
+import litellm
+from litellm._logging import verbose_logger
+import logging
+import time
+import pytest
+
+
+def test_langsmith_logging_async():
+    try:
+        os.environ["LANGSMITH_API_KEY"] = "lsv2_anything"
+        os.environ["LANGSMITH_PROJECT"] = "pr-b"
+        os.environ["LANGSMITH_BASE_URL"] = (
+            "https://exampleopenaiendpoint-production.up.railway.app"
+        )
+
+        percentage_diffs = []
+
+        for run in range(3):
+            print(f"\nRun {run + 1}:")
+
+            # Test with empty success_callback
+            litellm.success_callback = []
+            litellm.callbacks = []
+            litellm._async_success_callback = []
+            litellm._async_failure_callback = []
+            litellm.failure_callback = []
+            start_time_empty_callback = asyncio.run(make_async_calls())
+            print("Done with no callback test")
+
+            # Test with langsmith callback
+            print("Starting langsmith test")
+            litellm.success_callback = ["langsmith"]
+            start_time_langsmith = asyncio.run(make_async_calls())
+            print("Done with langsmith test")
+
+            # Compare times and calculate percentage difference
+            print(f"Time with success_callback='langsmith': {start_time_langsmith}")
+            print(f"Time with empty success_callback: {start_time_empty_callback}")
+
+            percentage_diff = (
+                abs(start_time_langsmith - start_time_empty_callback)
+                / start_time_empty_callback
+                * 100
+            )
+            percentage_diffs.append(percentage_diff)
+            print(f"Performance difference: {percentage_diff:.2f}%")
+        print("percentage_diffs", percentage_diffs)
+        # Calculate average percentage difference
+        avg_percentage_diff = sum(percentage_diffs) / len(percentage_diffs)
+        print(f"\nAverage performance difference: {avg_percentage_diff:.2f}%")
+
+        # Assert that the average difference is not more than 10%
+        assert (
+            avg_percentage_diff < 10
+        ), f"Average performance difference of {avg_percentage_diff:.2f}% exceeds 10% threshold"
+
+    except litellm.Timeout as e:
+        pass
+    except Exception as e:
+        pytest.fail(f"An exception occurred - {e}")
+
+    except litellm.Timeout as e:
+        pass
+    except Exception as e:
+        pytest.fail(f"An exception occurred - {e}")
+
+
+async def make_async_calls(metadata=None, **completion_kwargs):
+    total_tasks = 300
+    batch_size = 100
+    total_time = 0
+
+    for batch in range(3):
+        tasks = [create_async_task() for _ in range(batch_size)]
+
+        start_time = asyncio.get_event_loop().time()
+        responses = await asyncio.gather(*tasks)
+
+        for idx, response in enumerate(responses):
+            print(f"Response from Task {batch * batch_size + idx + 1}: {response}")
+
+        await asyncio.sleep(1)
+
+        batch_time = asyncio.get_event_loop().time() - start_time
+        total_time += batch_time
+
+    return total_time
+
+
+def create_async_task(**completion_kwargs):
+    """
+    Creates an async task for the litellm.acompletion function.
+    This is just the task, but it is not run here.
+    To run the task it must be awaited or used in other asyncio coroutine execution functions like asyncio.gather.
+    Any kwargs passed to this function will be passed to the litellm.acompletion function.
+    By default a standard set of arguments are used for the litellm.acompletion function.
+    """
+    completion_args = {
+        "model": "openai/chatgpt-v-2",
+        "api_version": "2024-02-01",
+        "messages": [{"role": "user", "content": "This is a test"}],
+        "max_tokens": 5,
+        "temperature": 0.7,
+        "timeout": 5,
+        "user": "langfuse_latency_test_user",
+        "mock_response": "hello from my load test",
+    }
+    completion_args.update(completion_kwargs)
+    return asyncio.create_task(litellm.acompletion(**completion_args))
--- a/tests/load_tests/test_otel_load_test.py
+++ b/tests/load_tests/test_otel_load_test.py
@ -0,0 +1,99 @@
+import sys
+
+import os
+
+sys.path.insert(0, os.path.abspath("../.."))
+
+import asyncio
+import litellm
+from litellm._logging import verbose_logger
+import logging
+import time
+import pytest
+
+
+def test_otel_logging_async():
+    try:
+        os.environ["OTEL_EXPORTER"] = "otlp_http"
+        os.environ["OTEL_ENDPOINT"] = (
+            "https://exampleopenaiendpoint-production.up.railway.app/traces"
+        )
+        os.environ["OTEL_HEADERS"] = "Authorization=K0BSwd"
+
+        def single_run():
+            litellm.callbacks = []
+            start_time_empty = asyncio.run(make_async_calls())
+            print(f"Time with empty callback: {start_time_empty}")
+
+            litellm.callbacks = ["otel"]
+            start_time_otel = asyncio.run(make_async_calls())
+            print(f"Time with otel callback: {start_time_otel}")
+
+            percent_diff = (
+                abs(start_time_otel - start_time_empty) / start_time_empty * 100
+            )
+            print(f"Run performance difference: {percent_diff:.2f}%")
+            return percent_diff
+
+        percent_diffs = [single_run() for _ in range(3)]
+        avg_percent_diff = sum(percent_diffs) / len(percent_diffs)
+
+        print(f"Percentage differences: {percent_diffs}")
+        print(f"Average performance difference: {avg_percent_diff:.2f}%")
+
+        assert (
+            avg_percent_diff < 10
+        ), f"Average performance difference of {avg_percent_diff:.2f}% exceeds 10% threshold"
+
+    except litellm.Timeout as e:
+        pass
+    except Exception as e:
+        pytest.fail(f"An exception occurred - {e}")
+
+
+async def make_async_calls(metadata=None, **completion_kwargs):
+    total_start_time = asyncio.get_event_loop().time()
+    tasks = []
+
+    async def create_and_run_task():
+        task = create_async_task(**completion_kwargs)
+        response = await task
+        print(f"Response: {response}")
+
+    for _ in range(3):  # Run for 10 seconds
+        # Create 100 tasks
+        tasks = []
+        for _ in range(100):
+            tasks.append(asyncio.create_task(create_and_run_task()))
+
+        # Wait for any remaining tasks to complete
+        await asyncio.gather(*tasks)
+
+        await asyncio.sleep(1)
+
+    # Calculate the total time taken
+    total_time = asyncio.get_event_loop().time() - total_start_time
+
+    return total_time
+
+
+def create_async_task(**completion_kwargs):
+    """
+    Creates an async task for the litellm.acompletion function.
+    This is just the task, but it is not run here.
+    To run the task it must be awaited or used in other asyncio coroutine execution functions like asyncio.gather.
+    Any kwargs passed to this function will be passed to the litellm.acompletion function.
+    By default a standard set of arguments are used for the litellm.acompletion function.
+    """
+    completion_args = {
+        "model": "openai/chatgpt-v-2",
+        "api_version": "2024-02-01",
+        "messages": [{"role": "user", "content": "This is a test" * 100}],
+        "max_tokens": 5,
+        "temperature": 0.7,
+        "timeout": 5,
+        "user": "langfuse_latency_test_user",
+        "mock_response": "Mock response",
+    }
+    completion_args.update(completion_kwargs)
+    return asyncio.create_task(litellm.acompletion(**completion_args))