Merge pull request #4607 from maamalama/helicone-cohere

Helicone Headers & Cohere support
2025-04-26 11:14:04 +00:00 · 2024-07-11 22:01:44 -07:00 · 2024-07-11 22:01:44 -07:00 · 5ad341d0ff
commit 5ad341d0ff
parent 1362a91d66 6e7f2ecf49
3 changed files with 158 additions and 26 deletions
--- a/litellm/integrations/helicone.py
+++ b/litellm/integrations/helicone.py
@ -4,11 +4,12 @@ import dotenv, os
 import requests  # type: ignore
 import litellm
 import traceback
 from litellm._logging import verbose_logger
 class HeliconeLogger:
    # Class variables or attributes
-    helicone_model_list = ["gpt", "claude"]
+    helicone_model_list = ["gpt", "claude", "command-r", "command-r-plus", "command-light", "command-medium", "command-medium-beta", "command-xlarge-nightly", "command-nightly"]
    def __init__(self):
        # Instance variables
@ -37,15 +38,49 @@ class HeliconeLogger:
        }
        return claude_provider_request, claude_response_obj
    @staticmethod
    def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict:
        """
        Adds metadata from proxy request headers to Helicone logging if keys start with "helicone_"
        and overwrites litellm_params.metadata if already included.
        For example if you want to add custom property to your request, send
        `headers: { ..., helicone-property-something: 1234 }` via proxy request.
        """
        if litellm_params is None:
            return metadata
        if litellm_params.get("proxy_server_request") is None:
            return metadata
        if metadata is None:
            metadata = {}
        proxy_headers = (
            litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
        )
        for header_key in proxy_headers:
            if header_key.startswith("helicone_"):
                metadata[header_key] = proxy_headers.get(header_key)
        return metadata
    def log_success(
-        self, model, messages, response_obj, start_time, end_time, print_verbose
+        self, model, messages, response_obj, start_time, end_time, print_verbose, kwargs
    ):
        # Method definition
        try:
            print_verbose(
                f"Helicone Logging - Enters logging function for model {model}"
            )
            litellm_params = kwargs.get("litellm_params", {})
            litellm_call_id = kwargs.get("litellm_call_id", None)
            metadata = (
                litellm_params.get("metadata", {}) or {}
            )
            metadata = self.add_metadata_from_header(litellm_params, metadata)
            model = (
                model
                if any(
@ -85,11 +120,13 @@ class HeliconeLogger:
            end_time_milliseconds = int(
                (end_time.timestamp() - end_time_seconds) * 1000
            )
            meta = {"Helicone-Auth": f"Bearer {self.key}"}
            meta.update(metadata)
            data = {
                "providerRequest": {
                    "url": self.provider_url,
                    "json": provider_request,
-                    "meta": {"Helicone-Auth": f"Bearer {self.key}"},
+                    "meta": meta,
                },
                "providerResponse": providerResponse,
                "timing": {
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -811,6 +811,7 @@ class Logging:
                        print_verbose("reaches helicone for logging!")
                        model = self.model
                        messages = self.model_call_details["input"]
                        kwargs = self.model_call_details
                        heliconeLogger.log_success(
                            model=model,
                            messages=messages,
@ -818,6 +819,7 @@ class Logging:
                            start_time=start_time,
                            end_time=end_time,
                            print_verbose=print_verbose,
                            kwargs=kwargs,
                        )
                    if callback == "langfuse":
                        global langFuseLogger
--- a/litellm/tests/test_helicone_integration.py
+++ b/litellm/tests/test_helicone_integration.py
@ -1,31 +1,124 @@
-# #### What this tests ####
+import asyncio
-# #    This tests if logging to the helicone integration actually works
+import copy
 import logging
 import os
 import sys
 import time
 from typing import Any
 from unittest.mock import MagicMock, patch
-# import sys, os
+logging.basicConfig(level=logging.DEBUG)
-# import traceback
+sys.path.insert(0, os.path.abspath("../.."))
 # import pytest
-# sys.path.insert(
+import litellm
-#     0, os.path.abspath("../..")
+from litellm import completion
 # )  # Adds the parent directory to the system path
 # import litellm
 # from litellm import embedding, completion
-# litellm.success_callback = ["helicone"]
+litellm.num_retries = 3
 litellm.success_callback = ["helicone"]
 os.environ["HELICONE_DEBUG"] = "True"
 os.environ['LITELLM_LOG'] = 'DEBUG'
-# litellm.set_verbose = True
+import pytest
-# user_message = "Hello, how are you?"
+def pre_helicone_setup():
-# messages = [{"content": user_message, "role": "user"}]
+    """
    Set up the logging for the 'pre_helicone_setup' function.
    """
    import logging
    logging.basicConfig(filename="helicone.log", level=logging.DEBUG)
    logger = logging.getLogger()
    file_handler = logging.FileHandler("helicone.log", mode="w")
    file_handler.setLevel(logging.DEBUG)
    logger.addHandler(file_handler)
    return
-# # openai call
+def test_helicone_logging_async():
-# response = completion(
+    try:
-#     model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]
+        pre_helicone_setup()
-# )
+        litellm.success_callback = []
        start_time_empty_callback = asyncio.run(make_async_calls())
        print("done with no callback test")
-# # cohere call
+        print("starting helicone test")
-# response = completion(
+        litellm.success_callback = ["helicone"]
-#     model="command-nightly",
+        start_time_helicone = asyncio.run(make_async_calls())
-#     messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}],
+        print("done with helicone test")
-# )
+
        print(f"Time taken with success_callback='helicone': {start_time_helicone}")
        print(f"Time taken with empty success_callback: {start_time_empty_callback}")
        assert abs(start_time_helicone - start_time_empty_callback) < 1
    except litellm.Timeout as e:
        pass
    except Exception as e:
        pytest.fail(f"An exception occurred - {e}")
 async def make_async_calls(metadata=None, **completion_kwargs):
    tasks = []
    for _ in range(5):
        tasks.append(create_async_task())
    start_time = asyncio.get_event_loop().time()
    responses = await asyncio.gather(*tasks)
    for idx, response in enumerate(responses):
        print(f"Response from Task {idx + 1}: {response}")
    total_time = asyncio.get_event_loop().time() - start_time
    return total_time
 def create_async_task(**completion_kwargs):
    completion_args = {
        "model": "azure/chatgpt-v-2",
        "api_version": "2024-02-01",
        "messages": [{"role": "user", "content": "This is a test"}],
        "max_tokens": 5,
        "temperature": 0.7,
        "timeout": 5,
        "user": "helicone_latency_test_user",
        "mock_response": "It's simple to use and easy to get started",
    }
    completion_args.update(completion_kwargs)
    return asyncio.create_task(litellm.acompletion(**completion_args))
@pytest.mark.asyncio
@pytest.mark.skipif(
    condition=not os.environ.get("OPENAI_API_KEY", False),
    reason="Authentication missing for openai",
 )
 async def test_helicone_logging_metadata():
    import uuid
    litellm.success_callback = ["helicone"]
    request_id = str(uuid.uuid4())
    trace_common_metadata = {
        "Helicone-Property-Request-Id": request_id
    }
    metadata = copy.deepcopy(trace_common_metadata)
    metadata["Helicone-Property-Conversation"] = "support_issue"
    metadata["Helicone-Auth"] = os.getenv("HELICONE_API_KEY")
    response = await create_async_task(
        model="gpt-3.5-turbo",
        mock_response="Hey! how's it going?",
        messages=[
            {
                "role": "user",
                "content": f"{request_id}",
            }
        ],
        max_tokens=100,
        temperature=0.2,
        metadata=copy.deepcopy(metadata),
    )
    print(response)
    time.sleep(3)