Merge pull request #4830 from BerriAI/litellm_braintrust_integration

Braintrust logging integration
2024-07-22 22:40:47 -07:00 · 2024-07-22 22:40:47 -07:00 · b69b7503db
commit b69b7503db
parent c4db6aa15e e770ac48a2
8 changed files with 586 additions and 3 deletions
--- a/docs/my-website/docs/observability/braintrust.md
+++ b/docs/my-website/docs/observability/braintrust.md
@ -0,0 +1,147 @@
+import Image from '@theme/IdealImage';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# ⚡️ Braintrust - Evals + Logging 
+
+[Braintrust](https://www.braintrust.dev/) manages evaluations, logging, prompt playground, to data management for AI products.
+
+
+## Quick Start
+
+```python
+# pip install langfuse 
+import litellm
+import os
+
+# set env 
+os.environ["BRAINTRUST_API_KEY"] = "" 
+os.environ['OPENAI_API_KEY']=""
+
+# set braintrust as a callback, litellm will send the data to braintrust
+litellm.callbacks = ["braintrust"] 
+ 
+# openai call
+response = litellm.completion(
+  model="gpt-3.5-turbo",
+  messages=[
+    {"role": "user", "content": "Hi 👋 - i'm openai"}
+  ]
+)
+```
+
+
+
+## OpenAI Proxy Usage
+
+1. Add keys to env 
+```env
+BRAINTRUST_API_KEY="" 
+```
+
+2. Add braintrust to callbacks 
+```yaml
+model_list:
+  - model_name: gpt-3.5-turbo
+    litellm_params:
+      model: gpt-3.5-turbo
+      api_key: os.environ/OPENAI_API_KEY
+
+
+litellm_settings:
+  callbacks: ["braintrust"]
+```
+
+3. Test it! 
+
+```bash
+curl -X POST 'http://0.0.0.0:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-D '{
+    "model": "groq-llama3",
+    "messages": [
+        { "role": "system", "content": "Use your tools smartly"},
+        { "role": "user", "content": "What time is it now? Use your tool"}
+    ]
+}'
+```
+
+## Advanced - pass Project ID 
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+response = litellm.completion(
+  model="gpt-3.5-turbo",
+  messages=[
+    {"role": "user", "content": "Hi 👋 - i'm openai"}
+  ], 
+  metadata={
+    "project_id": "my-special-project" 
+  }
+)
+```
+
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+**Curl**
+
+```bash
+curl -X POST 'http://0.0.0.0:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-D '{
+    "model": "groq-llama3",
+    "messages": [
+        { "role": "system", "content": "Use your tools smartly"},
+        { "role": "user", "content": "What time is it now? Use your tool"}
+    ],
+    "metadata": {
+        "project_id": "my-special-project"
+    }
+}'
+```
+
+**OpenAI SDK**
+
+```python
+import openai
+client = openai.OpenAI(
+    api_key="anything",
+    base_url="http://0.0.0.0:4000"
+)
+
+# request sent to model set on litellm proxy, `litellm --model`
+response = client.chat.completions.create(
+    model="gpt-3.5-turbo",
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test request, write a short poem"
+        }
+    ],
+    extra_body={ # pass in any provider-specific param, if not supported by openai, https://docs.litellm.ai/docs/completion/input#provider-specific-params
+        "metadata": { # 👈 use for logging additional params (e.g. to langfuse)
+            "project_id": "my-special-project"
+        }
+    }
+)
+
+print(response)
+```
+
+For more examples, [**Click Here**](../proxy/user_keys.md#chatcompletions)
+
+</TabItem>
+</Tabs>
+
+## Full API Spec 
+
+Here's everything you can pass in metadata for a braintrust request 
+
+`braintrust_*` - any metadata field starting with `braintrust_` will be passed as metadata to the logging request 
+
+`project_id`  - set the project id for a braintrust call. Default is `litellm`. 
--- a/docs/my-website/docs/observability/helicone_integration.md
+++ b/docs/my-website/docs/observability/helicone_integration.md
@ -1,4 +1,4 @@
-# 🧠 Helicone - OSS LLM Observability Platform
+# 🧊 Helicone - OSS LLM Observability Platform

 :::tip

--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -199,9 +199,10 @@ const sidebars = {
        "observability/raw_request_response",
        "observability/custom_callback",
        "observability/scrub_data",
-        "observability/helicone_integration",
+        "observability/braintrust",
        "observability/sentry",
        "observability/lago",
+        "observability/helicone_integration",
        "observability/openmeter",
        "observability/promptlayer_integration",
        "observability/wandb_integration",
--- a/litellm/init.py
+++ b/litellm/init.py
@ -44,6 +44,7 @@ _custom_logger_compatible_callbacks_literal = Literal[
    "dynamic_rate_limiter",
    "langsmith",
    "galileo",
+    "braintrust",
    "arize",
 ]
 _known_custom_logger_compatible_callbacks: List = list(
--- a/litellm/integrations/braintrust_logging.py
+++ b/litellm/integrations/braintrust_logging.py
@ -0,0 +1,369 @@
+# What is this?
+## Log success + failure events to Braintrust
+
+import copy
+import json
+import os
+import threading
+import traceback
+import uuid
+from typing import Literal, Optional
+
+import dotenv
+import httpx
+
+import litellm
+from litellm import verbose_logger
+from litellm.integrations.custom_logger import CustomLogger
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.utils import get_formatted_prompt
+
+global_braintrust_http_handler = AsyncHTTPHandler()
+global_braintrust_sync_http_handler = HTTPHandler()
+API_BASE = "https://api.braintrustdata.com/v1"
+
+
+def get_utc_datetime():
+    import datetime as dt
+    from datetime import datetime
+
+    if hasattr(dt, "UTC"):
+        return datetime.now(dt.UTC)  # type: ignore
+    else:
+        return datetime.utcnow()  # type: ignore
+
+
+class BraintrustLogger(CustomLogger):
+    def __init__(
+        self, api_key: Optional[str] = None, api_base: Optional[str] = None
+    ) -> None:
+        super().__init__()
+        self.validate_environment(api_key=api_key)
+        self.api_base = api_base or API_BASE
+        self.default_project_id = None
+        self.api_key: str = api_key or os.getenv("BRAINTRUST_API_KEY")  # type: ignore
+        self.headers = {
+            "Authorization": "Bearer " + self.api_key,
+            "Content-Type": "application/json",
+        }
+
+    def validate_environment(self, api_key: Optional[str]):
+        """
+        Expects
+        BRAINTRUST_API_KEY
+
+        in the environment
+        """
+        missing_keys = []
+        if api_key is None and os.getenv("BRAINTRUST_API_KEY", None) is None:
+            missing_keys.append("BRAINTRUST_API_KEY")
+
+        if len(missing_keys) > 0:
+            raise Exception("Missing keys={} in environment.".format(missing_keys))
+
+    @staticmethod
+    def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict:
+        """
+        Adds metadata from proxy request headers to Langfuse logging if keys start with "langfuse_"
+        and overwrites litellm_params.metadata if already included.
+
+        For example if you want to append your trace to an existing `trace_id` via header, send
+        `headers: { ..., langfuse_existing_trace_id: your-existing-trace-id }` via proxy request.
+        """
+        if litellm_params is None:
+            return metadata
+
+        if litellm_params.get("proxy_server_request") is None:
+            return metadata
+
+        if metadata is None:
+            metadata = {}
+
+        proxy_headers = (
+            litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
+        )
+
+        for metadata_param_key in proxy_headers:
+            if metadata_param_key.startswith("braintrust"):
+                trace_param_key = metadata_param_key.replace("braintrust", "", 1)
+                if trace_param_key in metadata:
+                    verbose_logger.warning(
+                        f"Overwriting Braintrust `{trace_param_key}` from request header"
+                    )
+                else:
+                    verbose_logger.debug(
+                        f"Found Braintrust `{trace_param_key}` in request header"
+                    )
+                metadata[trace_param_key] = proxy_headers.get(metadata_param_key)
+
+        return metadata
+
+    async def create_default_project_and_experiment(self):
+        project = await global_braintrust_http_handler.post(
+            f"{self.api_base}/project", headers=self.headers, json={"name": "litellm"}
+        )
+
+        project_dict = project.json()
+
+        self.default_project_id = project_dict["id"]
+
+    def create_sync_default_project_and_experiment(self):
+        project = global_braintrust_sync_http_handler.post(
+            f"{self.api_base}/project", headers=self.headers, json={"name": "litellm"}
+        )
+
+        project_dict = project.json()
+
+        self.default_project_id = project_dict["id"]
+
+    def log_success_event(self, kwargs, response_obj, start_time, end_time):
+        verbose_logger.debug("REACHES BRAINTRUST SUCCESS")
+        try:
+            litellm_call_id = kwargs.get("litellm_call_id")
+            project_id = kwargs.get("project_id", None)
+            if project_id is None:
+                if self.default_project_id is None:
+                    self.create_sync_default_project_and_experiment()
+                project_id = self.default_project_id
+
+            prompt = {"messages": kwargs.get("messages")}
+
+            if response_obj is not None and (
+                kwargs.get("call_type", None) == "embedding"
+                or isinstance(response_obj, litellm.EmbeddingResponse)
+            ):
+                input = prompt
+                output = None
+            elif response_obj is not None and isinstance(
+                response_obj, litellm.ModelResponse
+            ):
+                input = prompt
+                output = response_obj["choices"][0]["message"].json()
+            elif response_obj is not None and isinstance(
+                response_obj, litellm.TextCompletionResponse
+            ):
+                input = prompt
+                output = response_obj.choices[0].text
+            elif response_obj is not None and isinstance(
+                response_obj, litellm.ImageResponse
+            ):
+                input = prompt
+                output = response_obj["data"]
+
+            litellm_params = kwargs.get("litellm_params", {})
+            metadata = (
+                litellm_params.get("metadata", {}) or {}
+            )  # if litellm_params['metadata'] == None
+            metadata = self.add_metadata_from_header(litellm_params, metadata)
+            clean_metadata = {}
+            try:
+                metadata = copy.deepcopy(
+                    metadata
+                )  # Avoid modifying the original metadata
+            except:
+                new_metadata = {}
+                for key, value in metadata.items():
+                    if (
+                        isinstance(value, list)
+                        or isinstance(value, dict)
+                        or isinstance(value, str)
+                        or isinstance(value, int)
+                        or isinstance(value, float)
+                    ):
+                        new_metadata[key] = copy.deepcopy(value)
+                metadata = new_metadata
+
+            tags = []
+            if isinstance(metadata, dict):
+                for key, value in metadata.items():
+
+                    # generate langfuse tags - Default Tags sent to Langfuse from LiteLLM Proxy
+                    if (
+                        litellm._langfuse_default_tags is not None
+                        and isinstance(litellm._langfuse_default_tags, list)
+                        and key in litellm._langfuse_default_tags
+                    ):
+                        tags.append(f"{key}:{value}")
+
+                    # clean litellm metadata before logging
+                    if key in [
+                        "headers",
+                        "endpoint",
+                        "caching_groups",
+                        "previous_models",
+                    ]:
+                        continue
+                    else:
+                        clean_metadata[key] = value
+
+            cost = kwargs.get("response_cost", None)
+            if cost is not None:
+                clean_metadata["litellm_response_cost"] = cost
+
+            metrics: Optional[dict] = None
+            if (
+                response_obj is not None
+                and hasattr(response_obj, "usage")
+                and isinstance(response_obj.usage, litellm.Usage)
+            ):
+                generation_id = litellm.utils.get_logging_id(start_time, response_obj)
+                metrics = {
+                    "prompt_tokens": response_obj.usage.prompt_tokens,
+                    "completion_tokens": response_obj.usage.completion_tokens,
+                    "total_tokens": response_obj.usage.total_tokens,
+                    "total_cost": cost,
+                }
+
+            request_data = {
+                "id": litellm_call_id,
+                "input": prompt,
+                "output": output,
+                "metadata": clean_metadata,
+                "tags": tags,
+            }
+            if metrics is not None:
+                request_data["metrics"] = metrics
+
+            try:
+                global_braintrust_sync_http_handler.post(
+                    url=f"{self.api_base}/project_logs/{project_id}/insert",
+                    json={"events": [request_data]},
+                    headers=self.headers,
+                )
+            except httpx.HTTPStatusError as e:
+                raise Exception(e.response.text)
+        except Exception as e:
+            verbose_logger.error(
+                "Error logging to braintrust - Exception received - {}\n{}".format(
+                    str(e), traceback.format_exc()
+                )
+            )
+            raise e
+
+    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+        verbose_logger.debug("REACHES BRAINTRUST SUCCESS")
+        try:
+            litellm_call_id = kwargs.get("litellm_call_id")
+            project_id = kwargs.get("project_id", None)
+            if project_id is None:
+                if self.default_project_id is None:
+                    await self.create_default_project_and_experiment()
+                project_id = self.default_project_id
+
+            prompt = {"messages": kwargs.get("messages")}
+
+            if response_obj is not None and (
+                kwargs.get("call_type", None) == "embedding"
+                or isinstance(response_obj, litellm.EmbeddingResponse)
+            ):
+                input = prompt
+                output = None
+            elif response_obj is not None and isinstance(
+                response_obj, litellm.ModelResponse
+            ):
+                input = prompt
+                output = response_obj["choices"][0]["message"].json()
+            elif response_obj is not None and isinstance(
+                response_obj, litellm.TextCompletionResponse
+            ):
+                input = prompt
+                output = response_obj.choices[0].text
+            elif response_obj is not None and isinstance(
+                response_obj, litellm.ImageResponse
+            ):
+                input = prompt
+                output = response_obj["data"]
+
+            litellm_params = kwargs.get("litellm_params", {})
+            metadata = (
+                litellm_params.get("metadata", {}) or {}
+            )  # if litellm_params['metadata'] == None
+            metadata = self.add_metadata_from_header(litellm_params, metadata)
+            clean_metadata = {}
+            try:
+                metadata = copy.deepcopy(
+                    metadata
+                )  # Avoid modifying the original metadata
+            except:
+                new_metadata = {}
+                for key, value in metadata.items():
+                    if (
+                        isinstance(value, list)
+                        or isinstance(value, dict)
+                        or isinstance(value, str)
+                        or isinstance(value, int)
+                        or isinstance(value, float)
+                    ):
+                        new_metadata[key] = copy.deepcopy(value)
+                metadata = new_metadata
+
+            tags = []
+            if isinstance(metadata, dict):
+                for key, value in metadata.items():
+
+                    # generate langfuse tags - Default Tags sent to Langfuse from LiteLLM Proxy
+                    if (
+                        litellm._langfuse_default_tags is not None
+                        and isinstance(litellm._langfuse_default_tags, list)
+                        and key in litellm._langfuse_default_tags
+                    ):
+                        tags.append(f"{key}:{value}")
+
+                    # clean litellm metadata before logging
+                    if key in [
+                        "headers",
+                        "endpoint",
+                        "caching_groups",
+                        "previous_models",
+                    ]:
+                        continue
+                    else:
+                        clean_metadata[key] = value
+
+            cost = kwargs.get("response_cost", None)
+            if cost is not None:
+                clean_metadata["litellm_response_cost"] = cost
+
+            metrics: Optional[dict] = None
+            if (
+                response_obj is not None
+                and hasattr(response_obj, "usage")
+                and isinstance(response_obj.usage, litellm.Usage)
+            ):
+                generation_id = litellm.utils.get_logging_id(start_time, response_obj)
+                metrics = {
+                    "prompt_tokens": response_obj.usage.prompt_tokens,
+                    "completion_tokens": response_obj.usage.completion_tokens,
+                    "total_tokens": response_obj.usage.total_tokens,
+                    "total_cost": cost,
+                }
+
+            request_data = {
+                "id": litellm_call_id,
+                "input": prompt,
+                "output": output,
+                "metadata": clean_metadata,
+                "tags": tags,
+            }
+
+            if metrics is not None:
+                request_data["metrics"] = metrics
+
+            try:
+                await global_braintrust_http_handler.post(
+                    url=f"{self.api_base}/project_logs/{project_id}/insert",
+                    json={"events": [request_data]},
+                    headers=self.headers,
+                )
+            except httpx.HTTPStatusError as e:
+                raise Exception(e.response.text)
+        except Exception as e:
+            verbose_logger.error(
+                "Error logging to braintrust - Exception received - {}\n{}".format(
+                    str(e), traceback.format_exc()
+                )
+            )
+            raise e
+
+    def log_failure_event(self, kwargs, response_obj, start_time, end_time):
+        return super().log_failure_event(kwargs, response_obj, start_time, end_time)
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -53,6 +53,7 @@ from litellm.utils import (
 from ..integrations.aispend import AISpendLogger
 from ..integrations.athina import AthinaLogger
 from ..integrations.berrispend import BerriSpendLogger
+from ..integrations.braintrust_logging import BraintrustLogger
 from ..integrations.clickhouse import ClickhouseLogger
 from ..integrations.custom_logger import CustomLogger
 from ..integrations.datadog import DataDogLogger
@ -1945,7 +1946,14 @@ def _init_custom_logger_compatible_class(
        _openmeter_logger = OpenMeterLogger()
        _in_memory_loggers.append(_openmeter_logger)
        return _openmeter_logger  # type: ignore
+    elif logging_integration == "braintrust":
+        for callback in _in_memory_loggers:
+            if isinstance(callback, BraintrustLogger):
+                return callback  # type: ignore

+        braintrust_logger = BraintrustLogger()
+        _in_memory_loggers.append(braintrust_logger)
+        return braintrust_logger  # type: ignore
    elif logging_integration == "langsmith":
        for callback in _in_memory_loggers:
            if isinstance(callback, LangsmithLogger):
@ -2056,6 +2064,10 @@ def get_custom_logger_compatible_class(
        for callback in _in_memory_loggers:
            if isinstance(callback, OpenMeterLogger):
                return callback
+    elif logging_integration == "braintrust":
+        for callback in _in_memory_loggers:
+            if isinstance(callback, BraintrustLogger):
+                return callback
    elif logging_integration == "galileo":
        for callback in _in_memory_loggers:
            if isinstance(callback, GalileoObserve):
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -6,4 +6,4 @@ model_list:

 litellm_settings:
  callbacks: ["logfire"]
-  redact_user_api_key_info: true
+  redact_user_api_key_info: true
--- a/litellm/tests/test_braintrust.py
+++ b/litellm/tests/test_braintrust.py
@ -0,0 +1,53 @@
+# What is this?
+## This tests the braintrust integration
+
+import asyncio
+import os
+import random
+import sys
+import time
+import traceback
+from datetime import datetime
+
+from dotenv import load_dotenv
+from fastapi import Request
+
+load_dotenv()
+import os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import asyncio
+import logging
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+import litellm
+from litellm.llms.custom_httpx.http_handler import HTTPHandler
+
+
+def test_braintrust_logging():
+    import litellm
+
+    http_client = HTTPHandler()
+
+    setattr(
+        litellm.integrations.braintrust_logging,
+        "global_braintrust_sync_http_handler",
+        http_client,
+    )
+
+    with patch.object(http_client, "post", new=MagicMock()) as mock_client:
+
+        # set braintrust as a callback, litellm will send the data to braintrust
+        litellm.callbacks = ["braintrust"]
+
+        # openai call
+        response = litellm.completion(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
+        )
+
+        mock_client.assert_called()