Merge pull request #4607 from maamalama/helicone-cohere

Helicone Headers & Cohere support
2024-07-11 22:01:44 -07:00 · 2024-07-11 22:01:44 -07:00 · 5ad341d0ff
commit 5ad341d0ff
parent 1362a91d66 6e7f2ecf49
3 changed files with 158 additions and 26 deletions
--- a/litellm/integrations/helicone.py
+++ b/litellm/integrations/helicone.py
@ -4,11 +4,12 @@ import dotenv, os
 import requests  # type: ignore
 import litellm
 import traceback
+from litellm._logging import verbose_logger


 class HeliconeLogger:
    # Class variables or attributes
-    helicone_model_list = ["gpt", "claude"]
+    helicone_model_list = ["gpt", "claude", "command-r", "command-r-plus", "command-light", "command-medium", "command-medium-beta", "command-xlarge-nightly", "command-nightly"]

    def __init__(self):
        # Instance variables
@ -38,14 +39,48 @@ class HeliconeLogger:

        return claude_provider_request, claude_response_obj
    
+    @staticmethod
+    def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict:
+        """
+        Adds metadata from proxy request headers to Helicone logging if keys start with "helicone_"
+        and overwrites litellm_params.metadata if already included.
+
+        For example if you want to add custom property to your request, send
+        `headers: { ..., helicone-property-something: 1234 }` via proxy request.
+        """
+        if litellm_params is None:
+            return metadata
+
+        if litellm_params.get("proxy_server_request") is None:
+            return metadata
+
+        if metadata is None:
+            metadata = {}
+
+        proxy_headers = (
+            litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
+        )
+
+        for header_key in proxy_headers:
+            if header_key.startswith("helicone_"):
+                metadata[header_key] = proxy_headers.get(header_key)
+
+        return metadata
+
    def log_success(
-        self, model, messages, response_obj, start_time, end_time, print_verbose
+        self, model, messages, response_obj, start_time, end_time, print_verbose, kwargs
    ):
        # Method definition
        try:
            print_verbose(
                f"Helicone Logging - Enters logging function for model {model}"
            )
+            litellm_params = kwargs.get("litellm_params", {})
+            litellm_call_id = kwargs.get("litellm_call_id", None)
+            metadata = (
+                litellm_params.get("metadata", {}) or {}
+            )
+            metadata = self.add_metadata_from_header(litellm_params, metadata)
            model = (
                model
                if any(
@ -85,11 +120,13 @@ class HeliconeLogger:
            end_time_milliseconds = int(
                (end_time.timestamp() - end_time_seconds) * 1000
            )
+            meta = {"Helicone-Auth": f"Bearer {self.key}"}
+            meta.update(metadata)
            data = {
                "providerRequest": {
                    "url": self.provider_url,
                    "json": provider_request,
-                    "meta": {"Helicone-Auth": f"Bearer {self.key}"},
+                    "meta": meta,
                },
                "providerResponse": providerResponse,
                "timing": {
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -811,6 +811,7 @@ class Logging:
                        print_verbose("reaches helicone for logging!")
                        model = self.model
                        messages = self.model_call_details["input"]
+                        kwargs = self.model_call_details
                        heliconeLogger.log_success(
                            model=model,
                            messages=messages,
@ -818,6 +819,7 @@ class Logging:
                            start_time=start_time,
                            end_time=end_time,
                            print_verbose=print_verbose,
+                            kwargs=kwargs,
                        )
                    if callback == "langfuse":
                        global langFuseLogger
--- a/litellm/tests/test_helicone_integration.py
+++ b/litellm/tests/test_helicone_integration.py
@ -1,31 +1,124 @@
-# #### What this tests ####
-# #    This tests if logging to the helicone integration actually works
+import asyncio
+import copy
+import logging
+import os
+import sys
+import time
+from typing import Any
+from unittest.mock import MagicMock, patch

-# import sys, os
-# import traceback
-# import pytest
+logging.basicConfig(level=logging.DEBUG)
+sys.path.insert(0, os.path.abspath("../.."))

-# sys.path.insert(
-#     0, os.path.abspath("../..")
-# )  # Adds the parent directory to the system path
-# import litellm
-# from litellm import embedding, completion
+import litellm
+from litellm import completion

-# litellm.success_callback = ["helicone"]
+litellm.num_retries = 3
+litellm.success_callback = ["helicone"]
+os.environ["HELICONE_DEBUG"] = "True"
+os.environ['LITELLM_LOG'] = 'DEBUG'

-# litellm.set_verbose = True
+import pytest

-# user_message = "Hello, how are you?"
-# messages = [{"content": user_message, "role": "user"}]
+def pre_helicone_setup():
+    """
+    Set up the logging for the 'pre_helicone_setup' function.
+    """
+    import logging
+
+    logging.basicConfig(filename="helicone.log", level=logging.DEBUG)
+    logger = logging.getLogger()
+
+    file_handler = logging.FileHandler("helicone.log", mode="w")
+    file_handler.setLevel(logging.DEBUG)
+    logger.addHandler(file_handler)
+    return


-# # openai call
-# response = completion(
-#     model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]
-# )
+def test_helicone_logging_async():
+    try:
+        pre_helicone_setup()
+        litellm.success_callback = []
+        start_time_empty_callback = asyncio.run(make_async_calls())
+        print("done with no callback test")

-# # cohere call
-# response = completion(
-#     model="command-nightly",
-#     messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}],
-# )
+        print("starting helicone test")
+        litellm.success_callback = ["helicone"]
+        start_time_helicone = asyncio.run(make_async_calls())
+        print("done with helicone test")
+
+        print(f"Time taken with success_callback='helicone': {start_time_helicone}")
+        print(f"Time taken with empty success_callback: {start_time_empty_callback}")
+
+        assert abs(start_time_helicone - start_time_empty_callback) < 1
+
+    except litellm.Timeout as e:
+        pass
+    except Exception as e:
+        pytest.fail(f"An exception occurred - {e}")
+
+
+async def make_async_calls(metadata=None, **completion_kwargs):
+    tasks = []
+    for _ in range(5):
+        tasks.append(create_async_task())
+
+    start_time = asyncio.get_event_loop().time()
+
+    responses = await asyncio.gather(*tasks)
+
+    for idx, response in enumerate(responses):
+        print(f"Response from Task {idx + 1}: {response}")
+
+    total_time = asyncio.get_event_loop().time() - start_time
+
+    return total_time
+
+
+def create_async_task(**completion_kwargs):
+    completion_args = {
+        "model": "azure/chatgpt-v-2",
+        "api_version": "2024-02-01",
+        "messages": [{"role": "user", "content": "This is a test"}],
+        "max_tokens": 5,
+        "temperature": 0.7,
+        "timeout": 5,
+        "user": "helicone_latency_test_user",
+        "mock_response": "It's simple to use and easy to get started",
+    }
+    completion_args.update(completion_kwargs)
+    return asyncio.create_task(litellm.acompletion(**completion_args))
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(
+    condition=not os.environ.get("OPENAI_API_KEY", False),
+    reason="Authentication missing for openai",
+)
+async def test_helicone_logging_metadata():
+    import uuid
+    litellm.success_callback = ["helicone"]
+
+    request_id = str(uuid.uuid4())
+    trace_common_metadata = {
+        "Helicone-Property-Request-Id": request_id
+    }
+  
+    metadata = copy.deepcopy(trace_common_metadata)
+    metadata["Helicone-Property-Conversation"] = "support_issue"
+    metadata["Helicone-Auth"] = os.getenv("HELICONE_API_KEY")
+    response = await create_async_task(
+        model="gpt-3.5-turbo",
+        mock_response="Hey! how's it going?",
+        messages=[
+            {
+                "role": "user",
+                "content": f"{request_id}",
+            }
+        ],
+        max_tokens=100,
+        temperature=0.2,
+        metadata=copy.deepcopy(metadata),
+    )
+    print(response)
+
+    time.sleep(3)