diff --git a/litellm/integrations/helicone.py b/litellm/integrations/helicone.py index 8ea18a7d5b..f0666431a6 100644 --- a/litellm/integrations/helicone.py +++ b/litellm/integrations/helicone.py @@ -4,11 +4,12 @@ import dotenv, os import requests # type: ignore import litellm import traceback +from litellm._logging import verbose_logger class HeliconeLogger: # Class variables or attributes - helicone_model_list = ["gpt", "claude"] + helicone_model_list = ["gpt", "claude", "command-r", "command-r-plus", "command-light", "command-medium", "command-medium-beta", "command-xlarge-nightly", "command-nightly"] def __init__(self): # Instance variables @@ -37,15 +38,49 @@ class HeliconeLogger: } return claude_provider_request, claude_response_obj + + @staticmethod + def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict: + """ + Adds metadata from proxy request headers to Helicone logging if keys start with "helicone_" + and overwrites litellm_params.metadata if already included. + + For example if you want to add custom property to your request, send + `headers: { ..., helicone-property-something: 1234 }` via proxy request. + """ + if litellm_params is None: + return metadata + + if litellm_params.get("proxy_server_request") is None: + return metadata + + if metadata is None: + metadata = {} + + proxy_headers = ( + litellm_params.get("proxy_server_request", {}).get("headers", {}) or {} + ) + + for header_key in proxy_headers: + if header_key.startswith("helicone_"): + metadata[header_key] = proxy_headers.get(header_key) + + return metadata def log_success( - self, model, messages, response_obj, start_time, end_time, print_verbose + self, model, messages, response_obj, start_time, end_time, print_verbose, kwargs ): # Method definition try: print_verbose( f"Helicone Logging - Enters logging function for model {model}" ) + litellm_params = kwargs.get("litellm_params", {}) + litellm_call_id = kwargs.get("litellm_call_id", None) + metadata = ( + litellm_params.get("metadata", {}) or {} + ) + metadata = self.add_metadata_from_header(litellm_params, metadata) model = ( model if any( @@ -85,11 +120,13 @@ class HeliconeLogger: end_time_milliseconds = int( (end_time.timestamp() - end_time_seconds) * 1000 ) + meta = {"Helicone-Auth": f"Bearer {self.key}"} + meta.update(metadata) data = { "providerRequest": { "url": self.provider_url, "json": provider_request, - "meta": {"Helicone-Auth": f"Bearer {self.key}"}, + "meta": meta, }, "providerResponse": providerResponse, "timing": { diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 0edc90325d..5f7957a465 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -811,6 +811,7 @@ class Logging: print_verbose("reaches helicone for logging!") model = self.model messages = self.model_call_details["input"] + kwargs = self.model_call_details heliconeLogger.log_success( model=model, messages=messages, @@ -818,6 +819,7 @@ class Logging: start_time=start_time, end_time=end_time, print_verbose=print_verbose, + kwargs=kwargs, ) if callback == "langfuse": global langFuseLogger diff --git a/litellm/tests/test_helicone_integration.py b/litellm/tests/test_helicone_integration.py index 0348560009..8e1f0a94e9 100644 --- a/litellm/tests/test_helicone_integration.py +++ b/litellm/tests/test_helicone_integration.py @@ -1,31 +1,124 @@ -# #### What this tests #### -# # This tests if logging to the helicone integration actually works +import asyncio +import copy +import logging +import os +import sys +import time +from typing import Any +from unittest.mock import MagicMock, patch -# import sys, os -# import traceback -# import pytest +logging.basicConfig(level=logging.DEBUG) +sys.path.insert(0, os.path.abspath("../..")) -# sys.path.insert( -# 0, os.path.abspath("../..") -# ) # Adds the parent directory to the system path -# import litellm -# from litellm import embedding, completion +import litellm +from litellm import completion -# litellm.success_callback = ["helicone"] +litellm.num_retries = 3 +litellm.success_callback = ["helicone"] +os.environ["HELICONE_DEBUG"] = "True" +os.environ['LITELLM_LOG'] = 'DEBUG' -# litellm.set_verbose = True +import pytest -# user_message = "Hello, how are you?" -# messages = [{"content": user_message, "role": "user"}] +def pre_helicone_setup(): + """ + Set up the logging for the 'pre_helicone_setup' function. + """ + import logging + + logging.basicConfig(filename="helicone.log", level=logging.DEBUG) + logger = logging.getLogger() + + file_handler = logging.FileHandler("helicone.log", mode="w") + file_handler.setLevel(logging.DEBUG) + logger.addHandler(file_handler) + return -# # openai call -# response = completion( -# model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}] -# ) +def test_helicone_logging_async(): + try: + pre_helicone_setup() + litellm.success_callback = [] + start_time_empty_callback = asyncio.run(make_async_calls()) + print("done with no callback test") -# # cohere call -# response = completion( -# model="command-nightly", -# messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}], -# ) + print("starting helicone test") + litellm.success_callback = ["helicone"] + start_time_helicone = asyncio.run(make_async_calls()) + print("done with helicone test") + + print(f"Time taken with success_callback='helicone': {start_time_helicone}") + print(f"Time taken with empty success_callback: {start_time_empty_callback}") + + assert abs(start_time_helicone - start_time_empty_callback) < 1 + + except litellm.Timeout as e: + pass + except Exception as e: + pytest.fail(f"An exception occurred - {e}") + + +async def make_async_calls(metadata=None, **completion_kwargs): + tasks = [] + for _ in range(5): + tasks.append(create_async_task()) + + start_time = asyncio.get_event_loop().time() + + responses = await asyncio.gather(*tasks) + + for idx, response in enumerate(responses): + print(f"Response from Task {idx + 1}: {response}") + + total_time = asyncio.get_event_loop().time() - start_time + + return total_time + + +def create_async_task(**completion_kwargs): + completion_args = { + "model": "azure/chatgpt-v-2", + "api_version": "2024-02-01", + "messages": [{"role": "user", "content": "This is a test"}], + "max_tokens": 5, + "temperature": 0.7, + "timeout": 5, + "user": "helicone_latency_test_user", + "mock_response": "It's simple to use and easy to get started", + } + completion_args.update(completion_kwargs) + return asyncio.create_task(litellm.acompletion(**completion_args)) + +@pytest.mark.asyncio +@pytest.mark.skipif( + condition=not os.environ.get("OPENAI_API_KEY", False), + reason="Authentication missing for openai", +) +async def test_helicone_logging_metadata(): + import uuid + litellm.success_callback = ["helicone"] + + request_id = str(uuid.uuid4()) + trace_common_metadata = { + "Helicone-Property-Request-Id": request_id + } + + metadata = copy.deepcopy(trace_common_metadata) + metadata["Helicone-Property-Conversation"] = "support_issue" + metadata["Helicone-Auth"] = os.getenv("HELICONE_API_KEY") + response = await create_async_task( + model="gpt-3.5-turbo", + mock_response="Hey! how's it going?", + messages=[ + { + "role": "user", + "content": f"{request_id}", + } + ], + max_tokens=100, + temperature=0.2, + metadata=copy.deepcopy(metadata), + ) + print(response) + + time.sleep(3)