Merge pull request #5638 from BerriAI/litellm_langsmith_perf

[Langsmith Perf Improvement] Use /batch for Langsmith Logging
2024-09-11 17:43:26 -07:00 · 2024-09-11 17:43:26 -07:00 · f55318de47
commit f55318de47
parent de9a39e7c6 368a5fd052
8 changed files with 244 additions and 54 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -165,11 +165,11 @@ jobs:
            pip install pytest
            pip install tiktoken
            pip install aiohttp
            pip install openai
            pip install click
            pip install "boto3==1.34.34"
            pip install jinja2
            pip install tokenizers
            pip install openai
            pip install jsonschema
      - run:
          name: Run tests
--- a/litellm/init.py
+++ b/litellm/init.py
@ -55,6 +55,7 @@ _known_custom_logger_compatible_callbacks: List = list(
 )
 callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
 langfuse_default_tags: Optional[List[str]] = None
 langsmith_batch_size: Optional[int] = None
 _async_input_callback: List[Callable] = (
    []
 )  # internal variable - async custom callbacks are routed here.
--- a/litellm/integrations/custom_batch_logger.py
+++ b/litellm/integrations/custom_batch_logger.py
@ -0,0 +1,53 @@
 """
 Custom Logger that handles batching logic 
 Use this if you want your logs to be stored in memory and flushed periodically
 """
 import asyncio
 import time
 from typing import List, Literal, Optional
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 DEFAULT_BATCH_SIZE = 512
 DEFAULT_FLUSH_INTERVAL_SECONDS = 5
 class CustomBatchLogger(CustomLogger):
    def __init__(self, flush_lock: Optional[asyncio.Lock] = None, **kwargs) -> None:
        """
        Args:
            flush_lock (Optional[asyncio.Lock], optional): Lock to use when flushing the queue. Defaults to None. Only used for custom loggers that do batching
        """
        self.log_queue: List = []
        self.flush_interval = DEFAULT_FLUSH_INTERVAL_SECONDS  # 10 seconds
        self.batch_size = DEFAULT_BATCH_SIZE
        self.last_flush_time = time.time()
        self.flush_lock = flush_lock
        super().__init__(**kwargs)
        pass
    async def periodic_flush(self):
        while True:
            await asyncio.sleep(self.flush_interval)
            verbose_logger.debug(
                f"CustomLogger periodic flush after {self.flush_interval} seconds"
            )
            await self.flush_queue()
    async def flush_queue(self):
        async with self.flush_lock:
            if self.log_queue:
                verbose_logger.debug(
                    "CustomLogger: Flushing batch of %s events", self.batch_size
                )
                await self.async_send_batch()
                self.log_queue.clear()
                self.last_flush_time = time.time()
    async def async_send_batch(self):
        pass
--- a/litellm/integrations/langsmith.py
+++ b/litellm/integrations/langsmith.py
@ -3,9 +3,11 @@
 import asyncio
 import os
 import random
 import time
 import traceback
 import types
-from datetime import datetime
+import uuid
 from datetime import datetime, timezone
 from typing import Any, List, Optional, Union
 import dotenv  # type: ignore
@ -15,7 +17,7 @@ from pydantic import BaseModel  # type: ignore
 import litellm
 from litellm._logging import verbose_logger
-from litellm.integrations.custom_logger import CustomLogger
+from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    get_async_httpx_client,
@ -54,9 +56,8 @@ def is_serializable(value):
    return not isinstance(value, non_serializable_types)
-class LangsmithLogger(CustomLogger):
+class LangsmithLogger(CustomBatchLogger):
-    # Class variables or attributes
+    def __init__(self, **kwargs):
    def __init__(self):
        self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
        self.langsmith_project = os.getenv("LANGSMITH_PROJECT", "litellm-completion")
        self.langsmith_default_run_name = os.getenv(
@ -68,6 +69,14 @@ class LangsmithLogger(CustomLogger):
        self.async_httpx_client = get_async_httpx_client(
            llm_provider=httpxSpecialProvider.LoggingCallback
        )
        _batch_size = (
            os.getenv("LANGSMITH_BATCH_SIZE", None) or litellm.langsmith_batch_size
        )
        if _batch_size:
            self.batch_size = int(_batch_size)
        asyncio.create_task(self.periodic_flush())
        self.flush_lock = asyncio.Lock()
        super().__init__(**kwargs, flush_lock=self.flush_lock)
    def _prepare_log_data(self, kwargs, response_obj, start_time, end_time):
        import datetime
@ -170,52 +179,44 @@ class LangsmithLogger(CustomLogger):
        if dotted_order:
            data["dotted_order"] = dotted_order
        if "id" not in data or data["id"] is None:
            """
            for /batch langsmith requires id, trace_id and dotted_order passed as params
            """
            run_id = uuid.uuid4()
            data["id"] = str(run_id)
            data["trace_id"] = str(run_id)
            data["dotted_order"] = self.make_dot_order(run_id=run_id)
        verbose_logger.debug("Langsmith Logging data on langsmith: %s", data)
        return data
-    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+    def _send_batch(self):
-        try:
+        if not self.log_queue:
-            sampling_rate = (
+            return
                float(os.getenv("LANGSMITH_SAMPLING_RATE"))
                if os.getenv("LANGSMITH_SAMPLING_RATE") is not None
                and os.getenv("LANGSMITH_SAMPLING_RATE").strip().isdigit()
                else 1.0
            )
            random_sample = random.random()
            if random_sample > sampling_rate:
                verbose_logger.info(
                    "Skipping Langsmith logging. Sampling rate={}, random_sample={}".format(
                        sampling_rate, random_sample
                    )
                )
                return  # Skip logging
            verbose_logger.debug(
                "Langsmith Async Layer Logging - kwargs: %s, response_obj: %s",
                kwargs,
                response_obj,
            )
            data = self._prepare_log_data(kwargs, response_obj, start_time, end_time)
            url = f"{self.langsmith_base_url}/runs"
            verbose_logger.debug(f"Langsmith Logging - About to send data to {url} ...")
-            headers = {"x-api-key": self.langsmith_api_key}
+        url = f"{self.langsmith_base_url}/runs/batch"
-            response = await self.async_httpx_client.post(
+        headers = {"x-api-key": self.langsmith_api_key}
-                url=url, json=data, headers=headers
+
        try:
            response = requests.post(
                url=url,
                json=self.log_queue,
                headers=headers,
            )
            if response.status_code >= 300:
                verbose_logger.error(
-                    f"Langmsith Error: {response.status_code} - {response.text}"
+                    f"Langsmith Error: {response.status_code} - {response.text}"
                )
            else:
                verbose_logger.debug(
-                    "Run successfully created, response=%s", response.text
+                    f"Batch of {len(self.log_queue)} runs successfully created"
                )
-            verbose_logger.debug(
+
-                f"Langsmith Layer Logging - final response object: {response_obj}. Response text from langsmith={response.text}"
+            self.log_queue.clear()
-            )
+        except Exception as e:
        except:
            verbose_logger.error(f"Langsmith Layer Error - {traceback.format_exc()}")
    def log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -240,25 +241,95 @@ class LangsmithLogger(CustomLogger):
                response_obj,
            )
            data = self._prepare_log_data(kwargs, response_obj, start_time, end_time)
-            url = f"{self.langsmith_base_url}/runs"
+            self.log_queue.append(data)
            verbose_logger.debug(f"Langsmith Logging - About to send data to {url} ...")
            response = requests.post(
                url=url,
                json=data,
                headers={"x-api-key": self.langsmith_api_key},
            )
            if response.status_code >= 300:
                verbose_logger.error(f"Error: {response.status_code} - {response.text}")
            else:
                verbose_logger.debug("Run successfully created")
            verbose_logger.debug(
-                f"Langsmith Layer Logging - final response object: {response_obj}. Response text from langsmith={response.text}"
+                f"Langsmith, event added to queue. Will flush in {self.flush_interval} seconds..."
            )
            if len(self.log_queue) >= self.batch_size:
                self._send_batch()
        except:
            verbose_logger.error(f"Langsmith Layer Error - {traceback.format_exc()}")
    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
        try:
            sampling_rate = (
                float(os.getenv("LANGSMITH_SAMPLING_RATE"))
                if os.getenv("LANGSMITH_SAMPLING_RATE") is not None
                and os.getenv("LANGSMITH_SAMPLING_RATE").strip().isdigit()
                else 1.0
            )
            random_sample = random.random()
            if random_sample > sampling_rate:
                verbose_logger.info(
                    "Skipping Langsmith logging. Sampling rate={}, random_sample={}".format(
                        sampling_rate, random_sample
                    )
                )
                return  # Skip logging
            verbose_logger.debug(
                "Langsmith Async Layer Logging - kwargs: %s, response_obj: %s",
                kwargs,
                response_obj,
            )
            data = self._prepare_log_data(kwargs, response_obj, start_time, end_time)
            self.log_queue.append(data)
            verbose_logger.debug(
                "Langsmith logging: queue length %s, batch size %s",
                len(self.log_queue),
                self.batch_size,
            )
            if len(self.log_queue) >= self.batch_size:
                await self.flush_queue()
        except:
            verbose_logger.error(f"Langsmith Layer Error - {traceback.format_exc()}")
    async def async_send_batch(self):
        """
        sends runs to /batch endpoint
        Sends runs from self.log_queue
        Returns: None
        Raises: Does not raise an exception, will only verbose_logger.exception()
        """
        import json
        if not self.log_queue:
            return
        url = f"{self.langsmith_base_url}/runs/batch"
        headers = {"x-api-key": self.langsmith_api_key}
        try:
            response = await self.async_httpx_client.post(
                url=url,
                json={
                    "post": self.log_queue,
                },
                headers=headers,
            )
            response.raise_for_status()
            if response.status_code >= 300:
                verbose_logger.error(
                    f"Langsmith Error: {response.status_code} - {response.text}"
                )
            else:
                verbose_logger.debug(
                    f"Batch of {len(self.log_queue)} runs successfully created"
                )
        except httpx.HTTPStatusError as e:
            verbose_logger.exception(
                f"Langsmith HTTP Error: {e.response.status_code} - {e.response.text}"
            )
        except Exception as e:
            verbose_logger.exception(
                f"Langsmith Layer Error - {traceback.format_exc()}"
            )
    def get_run_by_id(self, run_id):
        url = f"{self.langsmith_base_url}/runs/{run_id}"
@ -268,3 +339,8 @@ class LangsmithLogger(CustomLogger):
        )
        return response.json()
    def make_dot_order(self, run_id: str):
        st = datetime.now(timezone.utc)
        id_ = run_id
        return st.strftime("%Y%m%dT%H%M%S%fZ") + str(id_)
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -14,3 +14,8 @@ model_list:
 general_settings: 
 master_key: sk-1234 
 litellm_settings:
  success_callback: ["langsmith", "prometheus"]
  service_callback: ["prometheus_system"]
  callbacks: ["otel"]
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@ -52,6 +52,7 @@ VERTEX_MODELS_TO_NOT_TEST = [
    "gemini-1.5-pro-preview-0215",
    "gemini-pro-experimental",
    "gemini-flash-experimental",
    "gemini-1.5-flash-exp-0827",
    "gemini-pro-flash",
 ]
--- a/litellm/tests/test_langsmith.py
+++ b/litellm/tests/test_langsmith.py
@ -22,6 +22,61 @@ litellm.set_verbose = True
 import time
@pytest.mark.asyncio
 async def test_langsmith_queue_logging():
    try:
        # Initialize LangsmithLogger
        test_langsmith_logger = LangsmithLogger()
        litellm.callbacks = [test_langsmith_logger]
        test_langsmith_logger.batch_size = 6
        litellm.set_verbose = True
        # Make multiple calls to ensure we don't hit the batch size
        for _ in range(5):
            response = await litellm.acompletion(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": "Test message"}],
                max_tokens=10,
                temperature=0.2,
                mock_response="This is a mock response",
            )
        await asyncio.sleep(3)
        # Check that logs are in the queue
        assert len(test_langsmith_logger.log_queue) == 5
        # Now make calls to exceed the batch size
        for _ in range(3):
            response = await litellm.acompletion(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": "Test message"}],
                max_tokens=10,
                temperature=0.2,
                mock_response="This is a mock response",
            )
        # Wait a short time for any asynchronous operations to complete
        await asyncio.sleep(1)
        print(
            "Length of langsmith log queue: {}".format(
                len(test_langsmith_logger.log_queue)
            )
        )
        # Check that the queue was flushed after exceeding batch size
        assert len(test_langsmith_logger.log_queue) < 5
        # Clean up
        for cb in litellm.callbacks:
            if isinstance(cb, LangsmithLogger):
                await cb.async_httpx_client.client.aclose()
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="Flaky test. covered by unit tests on custom logger.")
@pytest.mark.asyncio()
 async def test_async_langsmith_logging():
--- a/requirements.txt
+++ b/requirements.txt
@ -9,7 +9,6 @@ gunicorn==22.0.0 # server dep
 boto3==1.34.34 # aws bedrock/sagemaker calls
 redis==5.0.0 # caching
 numpy==1.24.3 # semantic caching
 pandas==2.1.1 # for viewing clickhouse spend analytics
 prisma==0.11.0 # for db
 mangum==0.17.0 # for aws lambda functions
 pynacl==1.5.0 # for encrypting keys