Merge pull request #4607 from maamalama/helicone-cohere

Helicone Headers & Cohere support
This commit is contained in:
Krish Dholakia 2024-07-11 22:01:44 -07:00 committed by GitHub
commit 5ad341d0ff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 158 additions and 26 deletions

View file

@ -4,11 +4,12 @@ import dotenv, os
import requests # type: ignore import requests # type: ignore
import litellm import litellm
import traceback import traceback
from litellm._logging import verbose_logger
class HeliconeLogger: class HeliconeLogger:
# Class variables or attributes # Class variables or attributes
helicone_model_list = ["gpt", "claude"] helicone_model_list = ["gpt", "claude", "command-r", "command-r-plus", "command-light", "command-medium", "command-medium-beta", "command-xlarge-nightly", "command-nightly"]
def __init__(self): def __init__(self):
# Instance variables # Instance variables
@ -37,15 +38,49 @@ class HeliconeLogger:
} }
return claude_provider_request, claude_response_obj return claude_provider_request, claude_response_obj
@staticmethod
def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict:
"""
Adds metadata from proxy request headers to Helicone logging if keys start with "helicone_"
and overwrites litellm_params.metadata if already included.
For example if you want to add custom property to your request, send
`headers: { ..., helicone-property-something: 1234 }` via proxy request.
"""
if litellm_params is None:
return metadata
if litellm_params.get("proxy_server_request") is None:
return metadata
if metadata is None:
metadata = {}
proxy_headers = (
litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
)
for header_key in proxy_headers:
if header_key.startswith("helicone_"):
metadata[header_key] = proxy_headers.get(header_key)
return metadata
def log_success( def log_success(
self, model, messages, response_obj, start_time, end_time, print_verbose self, model, messages, response_obj, start_time, end_time, print_verbose, kwargs
): ):
# Method definition # Method definition
try: try:
print_verbose( print_verbose(
f"Helicone Logging - Enters logging function for model {model}" f"Helicone Logging - Enters logging function for model {model}"
) )
litellm_params = kwargs.get("litellm_params", {})
litellm_call_id = kwargs.get("litellm_call_id", None)
metadata = (
litellm_params.get("metadata", {}) or {}
)
metadata = self.add_metadata_from_header(litellm_params, metadata)
model = ( model = (
model model
if any( if any(
@ -85,11 +120,13 @@ class HeliconeLogger:
end_time_milliseconds = int( end_time_milliseconds = int(
(end_time.timestamp() - end_time_seconds) * 1000 (end_time.timestamp() - end_time_seconds) * 1000
) )
meta = {"Helicone-Auth": f"Bearer {self.key}"}
meta.update(metadata)
data = { data = {
"providerRequest": { "providerRequest": {
"url": self.provider_url, "url": self.provider_url,
"json": provider_request, "json": provider_request,
"meta": {"Helicone-Auth": f"Bearer {self.key}"}, "meta": meta,
}, },
"providerResponse": providerResponse, "providerResponse": providerResponse,
"timing": { "timing": {

View file

@ -811,6 +811,7 @@ class Logging:
print_verbose("reaches helicone for logging!") print_verbose("reaches helicone for logging!")
model = self.model model = self.model
messages = self.model_call_details["input"] messages = self.model_call_details["input"]
kwargs = self.model_call_details
heliconeLogger.log_success( heliconeLogger.log_success(
model=model, model=model,
messages=messages, messages=messages,
@ -818,6 +819,7 @@ class Logging:
start_time=start_time, start_time=start_time,
end_time=end_time, end_time=end_time,
print_verbose=print_verbose, print_verbose=print_verbose,
kwargs=kwargs,
) )
if callback == "langfuse": if callback == "langfuse":
global langFuseLogger global langFuseLogger

View file

@ -1,31 +1,124 @@
# #### What this tests #### import asyncio
# # This tests if logging to the helicone integration actually works import copy
import logging
import os
import sys
import time
from typing import Any
from unittest.mock import MagicMock, patch
# import sys, os logging.basicConfig(level=logging.DEBUG)
# import traceback sys.path.insert(0, os.path.abspath("../.."))
# import pytest
# sys.path.insert( import litellm
# 0, os.path.abspath("../..") from litellm import completion
# ) # Adds the parent directory to the system path
# import litellm
# from litellm import embedding, completion
# litellm.success_callback = ["helicone"] litellm.num_retries = 3
litellm.success_callback = ["helicone"]
os.environ["HELICONE_DEBUG"] = "True"
os.environ['LITELLM_LOG'] = 'DEBUG'
# litellm.set_verbose = True import pytest
# user_message = "Hello, how are you?" def pre_helicone_setup():
# messages = [{"content": user_message, "role": "user"}] """
Set up the logging for the 'pre_helicone_setup' function.
"""
import logging
logging.basicConfig(filename="helicone.log", level=logging.DEBUG)
logger = logging.getLogger()
file_handler = logging.FileHandler("helicone.log", mode="w")
file_handler.setLevel(logging.DEBUG)
logger.addHandler(file_handler)
return
# # openai call def test_helicone_logging_async():
# response = completion( try:
# model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}] pre_helicone_setup()
# ) litellm.success_callback = []
start_time_empty_callback = asyncio.run(make_async_calls())
print("done with no callback test")
# # cohere call print("starting helicone test")
# response = completion( litellm.success_callback = ["helicone"]
# model="command-nightly", start_time_helicone = asyncio.run(make_async_calls())
# messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}], print("done with helicone test")
# )
print(f"Time taken with success_callback='helicone': {start_time_helicone}")
print(f"Time taken with empty success_callback: {start_time_empty_callback}")
assert abs(start_time_helicone - start_time_empty_callback) < 1
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
async def make_async_calls(metadata=None, **completion_kwargs):
tasks = []
for _ in range(5):
tasks.append(create_async_task())
start_time = asyncio.get_event_loop().time()
responses = await asyncio.gather(*tasks)
for idx, response in enumerate(responses):
print(f"Response from Task {idx + 1}: {response}")
total_time = asyncio.get_event_loop().time() - start_time
return total_time
def create_async_task(**completion_kwargs):
completion_args = {
"model": "azure/chatgpt-v-2",
"api_version": "2024-02-01",
"messages": [{"role": "user", "content": "This is a test"}],
"max_tokens": 5,
"temperature": 0.7,
"timeout": 5,
"user": "helicone_latency_test_user",
"mock_response": "It's simple to use and easy to get started",
}
completion_args.update(completion_kwargs)
return asyncio.create_task(litellm.acompletion(**completion_args))
@pytest.mark.asyncio
@pytest.mark.skipif(
condition=not os.environ.get("OPENAI_API_KEY", False),
reason="Authentication missing for openai",
)
async def test_helicone_logging_metadata():
import uuid
litellm.success_callback = ["helicone"]
request_id = str(uuid.uuid4())
trace_common_metadata = {
"Helicone-Property-Request-Id": request_id
}
metadata = copy.deepcopy(trace_common_metadata)
metadata["Helicone-Property-Conversation"] = "support_issue"
metadata["Helicone-Auth"] = os.getenv("HELICONE_API_KEY")
response = await create_async_task(
model="gpt-3.5-turbo",
mock_response="Hey! how's it going?",
messages=[
{
"role": "user",
"content": f"{request_id}",
}
],
max_tokens=100,
temperature=0.2,
metadata=copy.deepcopy(metadata),
)
print(response)
time.sleep(3)