diff --git a/docs/my-website/docs/observability/langsmith_integration.md b/docs/my-website/docs/observability/langsmith_integration.md index c038abd821..79d047e33a 100644 --- a/docs/my-website/docs/observability/langsmith_integration.md +++ b/docs/my-website/docs/observability/langsmith_integration.md @@ -14,7 +14,7 @@ https://github.com/BerriAI/litellm An all-in-one developer platform for every step of the application lifecycle https://smith.langchain.com/ - + :::info We want to learn how we can make the callbacks better! Meet the LiteLLM [founders](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) or diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md index 27f1789e0b..0d50166454 100644 --- a/docs/my-website/docs/proxy/logging.md +++ b/docs/my-website/docs/proxy/logging.md @@ -5,6 +5,7 @@ Log Proxy input, output, and exceptions using: - Langfuse - OpenTelemetry - Custom Callbacks +- Langsmith - DataDog - DynamoDB - s3 Bucket @@ -1086,6 +1087,50 @@ litellm_settings: Start the LiteLLM Proxy and make a test request to verify the logs reached your callback API +## Logging LLM IO to Langsmith + +1. Set `success_callback: ["langsmith"]` on litellm config.yaml + +If you're using a custom LangSmith instance, you can set the +`LANGSMITH_BASE_URL` environment variable to point to your instance. + +```yaml +litellm_settings: + success_callback: ["langsmith"] + +environment_variables: + LANGSMITH_API_KEY: "lsv2_pt_xxxxxxxx" + LANGSMITH_PROJECT: "litellm-proxy" + + LANGSMITH_BASE_URL: "https://api.smith.langchain.com" # (Optional - only needed if you have a custom Langsmith instance) +``` + + +2. Start Proxy + +``` +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl --location 'http://0.0.0.0:4000/chat/completions' \ +--header 'Content-Type: application/json' \ +--data ' { + "model": "fake-openai-endpoint", + "messages": [ + { + "role": "user", + "content": "Hello, Claude gm!" + } + ], + } +' +``` +Expect to see your log on Langfuse + + ## Logging LLM IO to Galileo [BETA] diff --git a/docs/my-website/img/langsmith_new.png b/docs/my-website/img/langsmith_new.png new file mode 100644 index 0000000000..d5586bdbe5 Binary files /dev/null and b/docs/my-website/img/langsmith_new.png differ diff --git a/litellm/__init__.py b/litellm/__init__.py index 645a0bccdf..7dcc934a68 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -38,7 +38,7 @@ success_callback: List[Union[str, Callable]] = [] failure_callback: List[Union[str, Callable]] = [] service_callback: List[Union[str, Callable]] = [] _custom_logger_compatible_callbacks_literal = Literal[ - "lago", "openmeter", "logfire", "dynamic_rate_limiter" + "lago", "openmeter", "logfire", "dynamic_rate_limiter", "langsmith", "galileo" ] callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = [] _langfuse_default_tags: Optional[ diff --git a/litellm/integrations/langsmith.py b/litellm/integrations/langsmith.py index d8b01c3446..afe8be28f5 100644 --- a/litellm/integrations/langsmith.py +++ b/litellm/integrations/langsmith.py @@ -5,12 +5,17 @@ import os import traceback import types from datetime import datetime -from typing import Any, List, Optional +from typing import Any, List, Optional, Union import dotenv # type: ignore import requests # type: ignore from pydantic import BaseModel # type: ignore +import litellm +from litellm._logging import verbose_logger +from litellm.integrations.custom_logger import CustomLogger +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler + class LangsmithInputs(BaseModel): model: Optional[str] = None @@ -24,7 +29,7 @@ class LangsmithInputs(BaseModel): custom_llm_provider: Optional[str] = None input: Optional[List[Any]] = None log_event_type: Optional[str] = None - original_response: Optional[str] = None + original_response: Optional[Any] = None response_cost: Optional[float] = None # LiteLLM Virtual Key specific fields @@ -43,7 +48,7 @@ def is_serializable(value): return not isinstance(value, non_serializable_types) -class LangsmithLogger: +class LangsmithLogger(CustomLogger): # Class variables or attributes def __init__(self): self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY") @@ -54,84 +59,116 @@ class LangsmithLogger: self.langsmith_base_url = os.getenv( "LANGSMITH_BASE_URL", "https://api.smith.langchain.com" ) + self.async_httpx_client = AsyncHTTPHandler() - def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose): - # Method definition - # inspired by Langsmith http api here: https://github.com/langchain-ai/langsmith-cookbook/blob/main/tracing-examples/rest/rest.ipynb - metadata = ( - kwargs.get("litellm_params", {}).get("metadata", {}) or {} - ) # if metadata is None + def _prepare_log_data(self, kwargs, response_obj, start_time, end_time): + import datetime + from datetime import timezone + + metadata = kwargs.get("litellm_params", {}).get("metadata", {}) or {} - # set user_api_key, user_team_id, user_api_key_user_id kwargs["user_api_key"] = metadata.get("user_api_key", None) kwargs["user_api_key_user_id"] = metadata.get("user_api_key_user_id", None) kwargs["user_api_key_team_alias"] = metadata.get( "user_api_key_team_alias", None ) - # set project name and run_name for langsmith logging - # users can pass project_name and run name to litellm.completion() - # Example: litellm.completion(model, messages, metadata={"project_name": "my-litellm-project", "run_name": "my-langsmith-run"}) - # if not set litellm will fallback to the environment variable LANGSMITH_PROJECT, then to the default project_name = litellm-completion, run_name = LLMRun project_name = metadata.get("project_name", self.langsmith_project) run_name = metadata.get("run_name", self.langsmith_default_run_name) run_id = metadata.get("id", None) - print_verbose( + verbose_logger.debug( f"Langsmith Logging - project_name: {project_name}, run_name {run_name}" ) - langsmith_base_url = os.getenv( - "LANGSMITH_BASE_URL", "https://api.smith.langchain.com" - ) try: - print_verbose( - f"Langsmith Logging - Enters logging function for model {kwargs}" - ) - import datetime - from datetime import timezone + start_time = kwargs["start_time"].astimezone(timezone.utc).isoformat() + end_time = kwargs["end_time"].astimezone(timezone.utc).isoformat() + except: + start_time = datetime.datetime.utcnow().isoformat() + end_time = datetime.datetime.utcnow().isoformat() - import requests + # filter out kwargs to not include any dicts, langsmith throws an erros when trying to log kwargs + logged_kwargs = LangsmithInputs(**kwargs) + kwargs = logged_kwargs.model_dump() + new_kwargs = {} + for key in kwargs: + value = kwargs[key] + if key == "start_time" or key == "end_time" or value is None: + pass + elif key == "original_response" and not isinstance(value, str): + new_kwargs[key] = str(value) + elif type(value) == datetime.datetime: + new_kwargs[key] = value.isoformat() + elif type(value) != dict and is_serializable(value=value): + new_kwargs[key] = value + elif not is_serializable(value=value): + continue + + if isinstance(response_obj, BaseModel): try: - start_time = kwargs["start_time"].astimezone(timezone.utc).isoformat() - end_time = kwargs["end_time"].astimezone(timezone.utc).isoformat() + response_obj = response_obj.model_dump() except: - start_time = datetime.datetime.utcnow().isoformat() - end_time = datetime.datetime.utcnow().isoformat() + response_obj = response_obj.dict() # type: ignore - # filter out kwargs to not include any dicts, langsmith throws an erros when trying to log kwargs - logged_kwargs = LangsmithInputs(**kwargs) - kwargs = logged_kwargs.model_dump() + data = { + "name": run_name, + "run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain" + "inputs": new_kwargs, + "outputs": response_obj, + "session_name": project_name, + "start_time": start_time, + "end_time": end_time, + } - new_kwargs = {} - for key in kwargs: - value = kwargs[key] - if key == "start_time" or key == "end_time" or value is None: - pass - elif type(value) == datetime.datetime: - new_kwargs[key] = value.isoformat() - elif type(value) != dict and is_serializable(value=value): - new_kwargs[key] = value + if run_id: + data["id"] = run_id - if isinstance(response_obj, BaseModel): - try: - response_obj = response_obj.model_dump() - except: - response_obj = response_obj.dict() # type: ignore + verbose_logger.debug("Langsmith Logging data on langsmith: %s", data) - data = { - "name": run_name, - "run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain" - "inputs": new_kwargs, - "outputs": response_obj, - "session_name": project_name, - "start_time": start_time, - "end_time": end_time, - "id": run_id, - } + return data + + async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): + try: + verbose_logger.debug( + "Langsmith Async Layer Logging - kwargs: %s, response_obj: %s", + kwargs, + response_obj, + ) + data = self._prepare_log_data(kwargs, response_obj, start_time, end_time) + url = f"{self.langsmith_base_url}/runs" + verbose_logger.debug(f"Langsmith Logging - About to send data to {url} ...") + + headers = {"x-api-key": self.langsmith_api_key} + response = await self.async_httpx_client.post( + url=url, json=data, headers=headers + ) + + if response.status_code >= 300: + verbose_logger.error( + f"Langmsith Error: {response.status_code} - {response.text}" + ) + else: + verbose_logger.debug( + "Run successfully created, response=%s", response.text + ) + verbose_logger.debug( + f"Langsmith Layer Logging - final response object: {response_obj}. Response text from langsmith={response.text}" + ) + except: + verbose_logger.error(f"Langsmith Layer Error - {traceback.format_exc()}") + + def log_success_event(self, kwargs, response_obj, start_time, end_time): + try: + verbose_logger.debug( + "Langsmith Sync Layer Logging - kwargs: %s, response_obj: %s", + kwargs, + response_obj, + ) + data = self._prepare_log_data(kwargs, response_obj, start_time, end_time) + url = f"{self.langsmith_base_url}/runs" + verbose_logger.debug(f"Langsmith Logging - About to send data to {url} ...") - url = f"{langsmith_base_url}/runs" - print_verbose(f"Langsmith Logging - About to send data to {url} ...") response = requests.post( url=url, json=data, @@ -139,16 +176,14 @@ class LangsmithLogger: ) if response.status_code >= 300: - print_verbose(f"Error: {response.status_code}") + verbose_logger.error(f"Error: {response.status_code} - {response.text}") else: - print_verbose("Run successfully created") - print_verbose( + verbose_logger.debug("Run successfully created") + verbose_logger.debug( f"Langsmith Layer Logging - final response object: {response_obj}. Response text from langsmith={response.text}" ) - return except: - print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}") - pass + verbose_logger.error(f"Langsmith Layer Error - {traceback.format_exc()}") def get_run_by_id(self, run_id): diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 9c8d02df98..a92e98e8b1 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -39,7 +39,6 @@ from litellm.utils import ( add_breadcrumb, capture_exception, customLogger, - langsmithLogger, liteDebuggerClient, logfireLogger, lunaryLogger, @@ -89,7 +88,6 @@ alerts_channel = None heliconeLogger = None athinaLogger = None promptLayerLogger = None -langsmithLogger = None logfireLogger = None weightsBiasesLogger = None customLogger = None @@ -136,7 +134,7 @@ in_memory_trace_id_cache = ServiceTraceIDCache() class Logging: - global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app + global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app custom_pricing: bool = False stream_options = None @@ -738,23 +736,6 @@ class Logging: end_time=end_time, print_verbose=print_verbose, ) - if callback == "langsmith": - print_verbose("reaches langsmith for logging!") - if self.stream: - if "complete_streaming_response" not in kwargs: - continue - else: - print_verbose( - "reaches langsmith for streaming logging!" - ) - result = kwargs["complete_streaming_response"] - langsmithLogger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) if callback == "logfire": global logfireLogger verbose_logger.debug("reaches logfire for success logging!") @@ -1829,7 +1810,7 @@ def set_callbacks(callback_list, function_id=None): """ Globally sets the callback client """ - global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger + global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger try: for callback in callback_list: @@ -1910,8 +1891,6 @@ def set_callbacks(callback_list, function_id=None): s3Logger = S3Logger() elif callback == "wandb": weightsBiasesLogger = WeightsBiasesLogger() - elif callback == "langsmith": - langsmithLogger = LangsmithLogger() elif callback == "logfire": logfireLogger = LogfireLogger() elif callback == "aispend": @@ -1964,6 +1943,15 @@ def _init_custom_logger_compatible_class( _in_memory_loggers.append(_openmeter_logger) return _openmeter_logger # type: ignore + elif logging_integration == "langsmith": + for callback in _in_memory_loggers: + if isinstance(callback, LangsmithLogger): + return callback # type: ignore + + _langsmith_logger = LangsmithLogger() + _in_memory_loggers.append(_langsmith_logger) + return _langsmith_logger # type: ignore + elif logging_integration == "galileo": for callback in _in_memory_loggers: if isinstance(callback, GalileoObserve): @@ -2032,6 +2020,10 @@ def get_custom_logger_compatible_class( for callback in _in_memory_loggers: if isinstance(callback, GalileoObserve): return callback + elif logging_integration == "langsmith": + for callback in _in_memory_loggers: + if isinstance(callback, LangsmithLogger): + return callback elif logging_integration == "logfire": if "LOGFIRE_TOKEN" not in os.environ: raise ValueError("LOGFIRE_TOKEN not found in environment variables") diff --git a/litellm/tests/test_langsmith.py b/litellm/tests/test_langsmith.py index 8af0c9cbb5..f69c964a13 100644 --- a/litellm/tests/test_langsmith.py +++ b/litellm/tests/test_langsmith.py @@ -4,24 +4,33 @@ import sys sys.path.insert(0, os.path.abspath("../..")) +import asyncio +import logging +import uuid + +import pytest + import litellm from litellm import completion +from litellm._logging import verbose_logger from litellm.integrations.langsmith import LangsmithLogger +verbose_logger.setLevel(logging.DEBUG) + litellm.set_verbose = True import time test_langsmith_logger = LangsmithLogger() -def test_langsmith_logging(): +@pytest.mark.asyncio() +async def test_langsmith_logging(): try: - import uuid run_id = str(uuid.uuid4()) litellm.set_verbose = True - litellm.success_callback = ["langsmith"] - response = completion( + litellm.callbacks = ["langsmith"] + response = await litellm.acompletion( model="claude-instant-1.2", messages=[{"role": "user", "content": "what llm are u"}], max_tokens=10, @@ -40,7 +49,7 @@ def test_langsmith_logging(): }, ) print(response) - time.sleep(3) + await asyncio.sleep(3) print("run_id", run_id) logged_run_on_langsmith = test_langsmith_logger.get_run_by_id(run_id=run_id) @@ -50,13 +59,15 @@ def test_langsmith_logging(): print("fields in logged_run_on_langsmith", logged_run_on_langsmith.keys()) input_fields_on_langsmith = logged_run_on_langsmith.get("inputs") - extra_fields_on_langsmith = logged_run_on_langsmith.get("extra") + extra_fields_on_langsmith = logged_run_on_langsmith.get("extra").get( + "invocation_params" + ) print("\nLogged INPUT ON LANGSMITH", input_fields_on_langsmith) print("\nextra fields on langsmith", extra_fields_on_langsmith) - assert input_fields_on_langsmith is not None + assert isinstance(input_fields_on_langsmith, dict) assert "api_key" not in input_fields_on_langsmith assert "api_key" not in extra_fields_on_langsmith @@ -67,6 +78,7 @@ def test_langsmith_logging(): except Exception as e: print(e) + pytest.fail(f"Error occurred: {e}") # test_langsmith_logging() @@ -75,6 +87,7 @@ def test_langsmith_logging(): def test_langsmith_logging_with_metadata(): try: litellm.success_callback = ["langsmith"] + litellm.set_verbose = True response = completion( model="gpt-3.5-turbo", messages=[{"role": "user", "content": "what llm are u"}], @@ -83,28 +96,66 @@ def test_langsmith_logging_with_metadata(): ) print(response) time.sleep(3) + except Exception as e: + pytest.fail(f"Error occurred: {e}") print(e) -# test_langsmith_logging_with_metadata() - - -def test_langsmith_logging_with_streaming_and_metadata(): +@pytest.mark.parametrize("sync_mode", [False, True]) +@pytest.mark.asyncio +async def test_langsmith_logging_with_streaming_and_metadata(sync_mode): try: litellm.success_callback = ["langsmith"] - response = completion( - model="gpt-3.5-turbo", - messages=[{"role": "user", "content": "what llm are u"}], - max_tokens=10, - temperature=0.2, - stream=True, + litellm.set_verbose = True + run_id = str(uuid.uuid4()) + + messages = [{"role": "user", "content": "what llm are u"}] + if sync_mode is True: + response = completion( + model="gpt-3.5-turbo", + messages=messages, + max_tokens=10, + temperature=0.2, + stream=True, + metadata={"id": run_id}, + ) + for chunk in response: + continue + time.sleep(3) + else: + response = await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + max_tokens=10, + temperature=0.2, + mock_response="This is a mock request", + stream=True, + metadata={"id": run_id}, + ) + async for chunk in response: + continue + await asyncio.sleep(3) + + print("run_id", run_id) + logged_run_on_langsmith = test_langsmith_logger.get_run_by_id(run_id=run_id) + + print("logged_run_on_langsmith", logged_run_on_langsmith) + + print("fields in logged_run_on_langsmith", logged_run_on_langsmith.keys()) + + input_fields_on_langsmith = logged_run_on_langsmith.get("inputs") + + extra_fields_on_langsmith = logged_run_on_langsmith.get("extra").get( + "invocation_params" ) - for chunk in response: - continue - time.sleep(3) + + assert logged_run_on_langsmith.get("run_type") == "llm" + print("\nLogged INPUT ON LANGSMITH", input_fields_on_langsmith) + + print("\nextra fields on langsmith", extra_fields_on_langsmith) + + assert isinstance(input_fields_on_langsmith, dict) except Exception as e: + pytest.fail(f"Error occurred: {e}") print(e) - - -# test_langsmith_logging_with_streaming_and_metadata() diff --git a/litellm/utils.py b/litellm/utils.py index b9c3f983ca..a02a276b77 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -417,6 +417,21 @@ def function_setup( # we only support async dynamo db logging for acompletion/aembedding since that's used on proxy litellm._async_success_callback.append(callback) removed_async_items.append(index) + elif callback == "langsmith": + callback_class = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore + callback, internal_usage_cache=None, llm_router=None + ) + + # don't double add a callback + if not any( + isinstance(cb, type(callback_class)) for cb in litellm.callbacks + ): + litellm.callbacks.append(callback_class) # type: ignore + litellm.input_callback.append(callback_class) # type: ignore + litellm.success_callback.append(callback_class) # type: ignore + litellm.failure_callback.append(callback_class) # type: ignore + litellm._async_success_callback.append(callback_class) # type: ignore + litellm._async_failure_callback.append(callback_class) # type: ignore # Pop the async items from success_callback in reverse order to avoid index issues for index in reversed(removed_async_items):