forked from phoenix/litellm-mirror
Merge pull request #5638 from BerriAI/litellm_langsmith_perf
[Langsmith Perf Improvement] Use /batch for Langsmith Logging
This commit is contained in:
commit
f55318de47
8 changed files with 244 additions and 54 deletions
|
@ -165,11 +165,11 @@ jobs:
|
||||||
pip install pytest
|
pip install pytest
|
||||||
pip install tiktoken
|
pip install tiktoken
|
||||||
pip install aiohttp
|
pip install aiohttp
|
||||||
|
pip install openai
|
||||||
pip install click
|
pip install click
|
||||||
pip install "boto3==1.34.34"
|
pip install "boto3==1.34.34"
|
||||||
pip install jinja2
|
pip install jinja2
|
||||||
pip install tokenizers
|
pip install tokenizers
|
||||||
pip install openai
|
|
||||||
pip install jsonschema
|
pip install jsonschema
|
||||||
- run:
|
- run:
|
||||||
name: Run tests
|
name: Run tests
|
||||||
|
|
|
@ -55,6 +55,7 @@ _known_custom_logger_compatible_callbacks: List = list(
|
||||||
)
|
)
|
||||||
callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
|
callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
|
||||||
langfuse_default_tags: Optional[List[str]] = None
|
langfuse_default_tags: Optional[List[str]] = None
|
||||||
|
langsmith_batch_size: Optional[int] = None
|
||||||
_async_input_callback: List[Callable] = (
|
_async_input_callback: List[Callable] = (
|
||||||
[]
|
[]
|
||||||
) # internal variable - async custom callbacks are routed here.
|
) # internal variable - async custom callbacks are routed here.
|
||||||
|
|
53
litellm/integrations/custom_batch_logger.py
Normal file
53
litellm/integrations/custom_batch_logger.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
"""
|
||||||
|
Custom Logger that handles batching logic
|
||||||
|
|
||||||
|
Use this if you want your logs to be stored in memory and flushed periodically
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import time
|
||||||
|
from typing import List, Literal, Optional
|
||||||
|
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
|
||||||
|
DEFAULT_BATCH_SIZE = 512
|
||||||
|
DEFAULT_FLUSH_INTERVAL_SECONDS = 5
|
||||||
|
|
||||||
|
|
||||||
|
class CustomBatchLogger(CustomLogger):
|
||||||
|
|
||||||
|
def __init__(self, flush_lock: Optional[asyncio.Lock] = None, **kwargs) -> None:
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
flush_lock (Optional[asyncio.Lock], optional): Lock to use when flushing the queue. Defaults to None. Only used for custom loggers that do batching
|
||||||
|
"""
|
||||||
|
self.log_queue: List = []
|
||||||
|
self.flush_interval = DEFAULT_FLUSH_INTERVAL_SECONDS # 10 seconds
|
||||||
|
self.batch_size = DEFAULT_BATCH_SIZE
|
||||||
|
self.last_flush_time = time.time()
|
||||||
|
self.flush_lock = flush_lock
|
||||||
|
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def periodic_flush(self):
|
||||||
|
while True:
|
||||||
|
await asyncio.sleep(self.flush_interval)
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"CustomLogger periodic flush after {self.flush_interval} seconds"
|
||||||
|
)
|
||||||
|
await self.flush_queue()
|
||||||
|
|
||||||
|
async def flush_queue(self):
|
||||||
|
async with self.flush_lock:
|
||||||
|
if self.log_queue:
|
||||||
|
verbose_logger.debug(
|
||||||
|
"CustomLogger: Flushing batch of %s events", self.batch_size
|
||||||
|
)
|
||||||
|
await self.async_send_batch()
|
||||||
|
self.log_queue.clear()
|
||||||
|
self.last_flush_time = time.time()
|
||||||
|
|
||||||
|
async def async_send_batch(self):
|
||||||
|
pass
|
|
@ -3,9 +3,11 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
import types
|
import types
|
||||||
from datetime import datetime
|
import uuid
|
||||||
|
from datetime import datetime, timezone
|
||||||
from typing import Any, List, Optional, Union
|
from typing import Any, List, Optional, Union
|
||||||
|
|
||||||
import dotenv # type: ignore
|
import dotenv # type: ignore
|
||||||
|
@ -15,7 +17,7 @@ from pydantic import BaseModel # type: ignore
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_batch_logger import CustomBatchLogger
|
||||||
from litellm.llms.custom_httpx.http_handler import (
|
from litellm.llms.custom_httpx.http_handler import (
|
||||||
AsyncHTTPHandler,
|
AsyncHTTPHandler,
|
||||||
get_async_httpx_client,
|
get_async_httpx_client,
|
||||||
|
@ -54,9 +56,8 @@ def is_serializable(value):
|
||||||
return not isinstance(value, non_serializable_types)
|
return not isinstance(value, non_serializable_types)
|
||||||
|
|
||||||
|
|
||||||
class LangsmithLogger(CustomLogger):
|
class LangsmithLogger(CustomBatchLogger):
|
||||||
# Class variables or attributes
|
def __init__(self, **kwargs):
|
||||||
def __init__(self):
|
|
||||||
self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
|
self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
|
||||||
self.langsmith_project = os.getenv("LANGSMITH_PROJECT", "litellm-completion")
|
self.langsmith_project = os.getenv("LANGSMITH_PROJECT", "litellm-completion")
|
||||||
self.langsmith_default_run_name = os.getenv(
|
self.langsmith_default_run_name = os.getenv(
|
||||||
|
@ -68,6 +69,14 @@ class LangsmithLogger(CustomLogger):
|
||||||
self.async_httpx_client = get_async_httpx_client(
|
self.async_httpx_client = get_async_httpx_client(
|
||||||
llm_provider=httpxSpecialProvider.LoggingCallback
|
llm_provider=httpxSpecialProvider.LoggingCallback
|
||||||
)
|
)
|
||||||
|
_batch_size = (
|
||||||
|
os.getenv("LANGSMITH_BATCH_SIZE", None) or litellm.langsmith_batch_size
|
||||||
|
)
|
||||||
|
if _batch_size:
|
||||||
|
self.batch_size = int(_batch_size)
|
||||||
|
asyncio.create_task(self.periodic_flush())
|
||||||
|
self.flush_lock = asyncio.Lock()
|
||||||
|
super().__init__(**kwargs, flush_lock=self.flush_lock)
|
||||||
|
|
||||||
def _prepare_log_data(self, kwargs, response_obj, start_time, end_time):
|
def _prepare_log_data(self, kwargs, response_obj, start_time, end_time):
|
||||||
import datetime
|
import datetime
|
||||||
|
@ -170,52 +179,44 @@ class LangsmithLogger(CustomLogger):
|
||||||
if dotted_order:
|
if dotted_order:
|
||||||
data["dotted_order"] = dotted_order
|
data["dotted_order"] = dotted_order
|
||||||
|
|
||||||
|
if "id" not in data or data["id"] is None:
|
||||||
|
"""
|
||||||
|
for /batch langsmith requires id, trace_id and dotted_order passed as params
|
||||||
|
"""
|
||||||
|
run_id = uuid.uuid4()
|
||||||
|
data["id"] = str(run_id)
|
||||||
|
data["trace_id"] = str(run_id)
|
||||||
|
data["dotted_order"] = self.make_dot_order(run_id=run_id)
|
||||||
|
|
||||||
verbose_logger.debug("Langsmith Logging data on langsmith: %s", data)
|
verbose_logger.debug("Langsmith Logging data on langsmith: %s", data)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
def _send_batch(self):
|
||||||
try:
|
if not self.log_queue:
|
||||||
sampling_rate = (
|
return
|
||||||
float(os.getenv("LANGSMITH_SAMPLING_RATE"))
|
|
||||||
if os.getenv("LANGSMITH_SAMPLING_RATE") is not None
|
|
||||||
and os.getenv("LANGSMITH_SAMPLING_RATE").strip().isdigit()
|
|
||||||
else 1.0
|
|
||||||
)
|
|
||||||
random_sample = random.random()
|
|
||||||
if random_sample > sampling_rate:
|
|
||||||
verbose_logger.info(
|
|
||||||
"Skipping Langsmith logging. Sampling rate={}, random_sample={}".format(
|
|
||||||
sampling_rate, random_sample
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return # Skip logging
|
|
||||||
verbose_logger.debug(
|
|
||||||
"Langsmith Async Layer Logging - kwargs: %s, response_obj: %s",
|
|
||||||
kwargs,
|
|
||||||
response_obj,
|
|
||||||
)
|
|
||||||
data = self._prepare_log_data(kwargs, response_obj, start_time, end_time)
|
|
||||||
url = f"{self.langsmith_base_url}/runs"
|
|
||||||
verbose_logger.debug(f"Langsmith Logging - About to send data to {url} ...")
|
|
||||||
|
|
||||||
headers = {"x-api-key": self.langsmith_api_key}
|
url = f"{self.langsmith_base_url}/runs/batch"
|
||||||
response = await self.async_httpx_client.post(
|
headers = {"x-api-key": self.langsmith_api_key}
|
||||||
url=url, json=data, headers=headers
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
url=url,
|
||||||
|
json=self.log_queue,
|
||||||
|
headers=headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code >= 300:
|
if response.status_code >= 300:
|
||||||
verbose_logger.error(
|
verbose_logger.error(
|
||||||
f"Langmsith Error: {response.status_code} - {response.text}"
|
f"Langsmith Error: {response.status_code} - {response.text}"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
"Run successfully created, response=%s", response.text
|
f"Batch of {len(self.log_queue)} runs successfully created"
|
||||||
)
|
)
|
||||||
verbose_logger.debug(
|
|
||||||
f"Langsmith Layer Logging - final response object: {response_obj}. Response text from langsmith={response.text}"
|
self.log_queue.clear()
|
||||||
)
|
except Exception as e:
|
||||||
except:
|
|
||||||
verbose_logger.error(f"Langsmith Layer Error - {traceback.format_exc()}")
|
verbose_logger.error(f"Langsmith Layer Error - {traceback.format_exc()}")
|
||||||
|
|
||||||
def log_success_event(self, kwargs, response_obj, start_time, end_time):
|
def log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
@ -240,25 +241,95 @@ class LangsmithLogger(CustomLogger):
|
||||||
response_obj,
|
response_obj,
|
||||||
)
|
)
|
||||||
data = self._prepare_log_data(kwargs, response_obj, start_time, end_time)
|
data = self._prepare_log_data(kwargs, response_obj, start_time, end_time)
|
||||||
url = f"{self.langsmith_base_url}/runs"
|
self.log_queue.append(data)
|
||||||
verbose_logger.debug(f"Langsmith Logging - About to send data to {url} ...")
|
|
||||||
|
|
||||||
response = requests.post(
|
|
||||||
url=url,
|
|
||||||
json=data,
|
|
||||||
headers={"x-api-key": self.langsmith_api_key},
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code >= 300:
|
|
||||||
verbose_logger.error(f"Error: {response.status_code} - {response.text}")
|
|
||||||
else:
|
|
||||||
verbose_logger.debug("Run successfully created")
|
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"Langsmith Layer Logging - final response object: {response_obj}. Response text from langsmith={response.text}"
|
f"Langsmith, event added to queue. Will flush in {self.flush_interval} seconds..."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if len(self.log_queue) >= self.batch_size:
|
||||||
|
self._send_batch()
|
||||||
|
|
||||||
except:
|
except:
|
||||||
verbose_logger.error(f"Langsmith Layer Error - {traceback.format_exc()}")
|
verbose_logger.error(f"Langsmith Layer Error - {traceback.format_exc()}")
|
||||||
|
|
||||||
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
try:
|
||||||
|
sampling_rate = (
|
||||||
|
float(os.getenv("LANGSMITH_SAMPLING_RATE"))
|
||||||
|
if os.getenv("LANGSMITH_SAMPLING_RATE") is not None
|
||||||
|
and os.getenv("LANGSMITH_SAMPLING_RATE").strip().isdigit()
|
||||||
|
else 1.0
|
||||||
|
)
|
||||||
|
random_sample = random.random()
|
||||||
|
if random_sample > sampling_rate:
|
||||||
|
verbose_logger.info(
|
||||||
|
"Skipping Langsmith logging. Sampling rate={}, random_sample={}".format(
|
||||||
|
sampling_rate, random_sample
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return # Skip logging
|
||||||
|
verbose_logger.debug(
|
||||||
|
"Langsmith Async Layer Logging - kwargs: %s, response_obj: %s",
|
||||||
|
kwargs,
|
||||||
|
response_obj,
|
||||||
|
)
|
||||||
|
data = self._prepare_log_data(kwargs, response_obj, start_time, end_time)
|
||||||
|
self.log_queue.append(data)
|
||||||
|
verbose_logger.debug(
|
||||||
|
"Langsmith logging: queue length %s, batch size %s",
|
||||||
|
len(self.log_queue),
|
||||||
|
self.batch_size,
|
||||||
|
)
|
||||||
|
if len(self.log_queue) >= self.batch_size:
|
||||||
|
await self.flush_queue()
|
||||||
|
except:
|
||||||
|
verbose_logger.error(f"Langsmith Layer Error - {traceback.format_exc()}")
|
||||||
|
|
||||||
|
async def async_send_batch(self):
|
||||||
|
"""
|
||||||
|
sends runs to /batch endpoint
|
||||||
|
|
||||||
|
Sends runs from self.log_queue
|
||||||
|
|
||||||
|
Returns: None
|
||||||
|
|
||||||
|
Raises: Does not raise an exception, will only verbose_logger.exception()
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
|
||||||
|
if not self.log_queue:
|
||||||
|
return
|
||||||
|
|
||||||
|
url = f"{self.langsmith_base_url}/runs/batch"
|
||||||
|
headers = {"x-api-key": self.langsmith_api_key}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await self.async_httpx_client.post(
|
||||||
|
url=url,
|
||||||
|
json={
|
||||||
|
"post": self.log_queue,
|
||||||
|
},
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
if response.status_code >= 300:
|
||||||
|
verbose_logger.error(
|
||||||
|
f"Langsmith Error: {response.status_code} - {response.text}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"Batch of {len(self.log_queue)} runs successfully created"
|
||||||
|
)
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
verbose_logger.exception(
|
||||||
|
f"Langsmith HTTP Error: {e.response.status_code} - {e.response.text}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.exception(
|
||||||
|
f"Langsmith Layer Error - {traceback.format_exc()}"
|
||||||
|
)
|
||||||
|
|
||||||
def get_run_by_id(self, run_id):
|
def get_run_by_id(self, run_id):
|
||||||
|
|
||||||
url = f"{self.langsmith_base_url}/runs/{run_id}"
|
url = f"{self.langsmith_base_url}/runs/{run_id}"
|
||||||
|
@ -268,3 +339,8 @@ class LangsmithLogger(CustomLogger):
|
||||||
)
|
)
|
||||||
|
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
|
def make_dot_order(self, run_id: str):
|
||||||
|
st = datetime.now(timezone.utc)
|
||||||
|
id_ = run_id
|
||||||
|
return st.strftime("%Y%m%dT%H%M%S%fZ") + str(id_)
|
||||||
|
|
|
@ -14,3 +14,8 @@ model_list:
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
success_callback: ["langsmith", "prometheus"]
|
||||||
|
service_callback: ["prometheus_system"]
|
||||||
|
callbacks: ["otel"]
|
||||||
|
|
|
@ -52,6 +52,7 @@ VERTEX_MODELS_TO_NOT_TEST = [
|
||||||
"gemini-1.5-pro-preview-0215",
|
"gemini-1.5-pro-preview-0215",
|
||||||
"gemini-pro-experimental",
|
"gemini-pro-experimental",
|
||||||
"gemini-flash-experimental",
|
"gemini-flash-experimental",
|
||||||
|
"gemini-1.5-flash-exp-0827",
|
||||||
"gemini-pro-flash",
|
"gemini-pro-flash",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,61 @@ litellm.set_verbose = True
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_langsmith_queue_logging():
|
||||||
|
try:
|
||||||
|
# Initialize LangsmithLogger
|
||||||
|
test_langsmith_logger = LangsmithLogger()
|
||||||
|
|
||||||
|
litellm.callbacks = [test_langsmith_logger]
|
||||||
|
test_langsmith_logger.batch_size = 6
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
# Make multiple calls to ensure we don't hit the batch size
|
||||||
|
for _ in range(5):
|
||||||
|
response = await litellm.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Test message"}],
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.2,
|
||||||
|
mock_response="This is a mock response",
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
# Check that logs are in the queue
|
||||||
|
assert len(test_langsmith_logger.log_queue) == 5
|
||||||
|
|
||||||
|
# Now make calls to exceed the batch size
|
||||||
|
for _ in range(3):
|
||||||
|
response = await litellm.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Test message"}],
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.2,
|
||||||
|
mock_response="This is a mock response",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Wait a short time for any asynchronous operations to complete
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
|
print(
|
||||||
|
"Length of langsmith log queue: {}".format(
|
||||||
|
len(test_langsmith_logger.log_queue)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# Check that the queue was flushed after exceeding batch size
|
||||||
|
assert len(test_langsmith_logger.log_queue) < 5
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
for cb in litellm.callbacks:
|
||||||
|
if isinstance(cb, LangsmithLogger):
|
||||||
|
await cb.async_httpx_client.client.aclose()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="Flaky test. covered by unit tests on custom logger.")
|
@pytest.mark.skip(reason="Flaky test. covered by unit tests on custom logger.")
|
||||||
@pytest.mark.asyncio()
|
@pytest.mark.asyncio()
|
||||||
async def test_async_langsmith_logging():
|
async def test_async_langsmith_logging():
|
||||||
|
|
|
@ -9,7 +9,6 @@ gunicorn==22.0.0 # server dep
|
||||||
boto3==1.34.34 # aws bedrock/sagemaker calls
|
boto3==1.34.34 # aws bedrock/sagemaker calls
|
||||||
redis==5.0.0 # caching
|
redis==5.0.0 # caching
|
||||||
numpy==1.24.3 # semantic caching
|
numpy==1.24.3 # semantic caching
|
||||||
pandas==2.1.1 # for viewing clickhouse spend analytics
|
|
||||||
prisma==0.11.0 # for db
|
prisma==0.11.0 # for db
|
||||||
mangum==0.17.0 # for aws lambda functions
|
mangum==0.17.0 # for aws lambda functions
|
||||||
pynacl==1.5.0 # for encrypting keys
|
pynacl==1.5.0 # for encrypting keys
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue