Merge pull request #4607 from maamalama/helicone-cohere

Helicone Headers & Cohere support
This commit is contained in:
Krish Dholakia 2024-07-11 22:01:44 -07:00 committed by GitHub
commit 5ad341d0ff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 158 additions and 26 deletions

View file

@ -4,11 +4,12 @@ import dotenv, os
import requests # type: ignore
import litellm
import traceback
from litellm._logging import verbose_logger
class HeliconeLogger:
# Class variables or attributes
helicone_model_list = ["gpt", "claude"]
helicone_model_list = ["gpt", "claude", "command-r", "command-r-plus", "command-light", "command-medium", "command-medium-beta", "command-xlarge-nightly", "command-nightly"]
def __init__(self):
# Instance variables
@ -38,14 +39,48 @@ class HeliconeLogger:
return claude_provider_request, claude_response_obj
@staticmethod
def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict:
"""
Adds metadata from proxy request headers to Helicone logging if keys start with "helicone_"
and overwrites litellm_params.metadata if already included.
For example if you want to add custom property to your request, send
`headers: { ..., helicone-property-something: 1234 }` via proxy request.
"""
if litellm_params is None:
return metadata
if litellm_params.get("proxy_server_request") is None:
return metadata
if metadata is None:
metadata = {}
proxy_headers = (
litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
)
for header_key in proxy_headers:
if header_key.startswith("helicone_"):
metadata[header_key] = proxy_headers.get(header_key)
return metadata
def log_success(
self, model, messages, response_obj, start_time, end_time, print_verbose
self, model, messages, response_obj, start_time, end_time, print_verbose, kwargs
):
# Method definition
try:
print_verbose(
f"Helicone Logging - Enters logging function for model {model}"
)
litellm_params = kwargs.get("litellm_params", {})
litellm_call_id = kwargs.get("litellm_call_id", None)
metadata = (
litellm_params.get("metadata", {}) or {}
)
metadata = self.add_metadata_from_header(litellm_params, metadata)
model = (
model
if any(
@ -85,11 +120,13 @@ class HeliconeLogger:
end_time_milliseconds = int(
(end_time.timestamp() - end_time_seconds) * 1000
)
meta = {"Helicone-Auth": f"Bearer {self.key}"}
meta.update(metadata)
data = {
"providerRequest": {
"url": self.provider_url,
"json": provider_request,
"meta": {"Helicone-Auth": f"Bearer {self.key}"},
"meta": meta,
},
"providerResponse": providerResponse,
"timing": {

View file

@ -811,6 +811,7 @@ class Logging:
print_verbose("reaches helicone for logging!")
model = self.model
messages = self.model_call_details["input"]
kwargs = self.model_call_details
heliconeLogger.log_success(
model=model,
messages=messages,
@ -818,6 +819,7 @@ class Logging:
start_time=start_time,
end_time=end_time,
print_verbose=print_verbose,
kwargs=kwargs,
)
if callback == "langfuse":
global langFuseLogger

View file

@ -1,31 +1,124 @@
# #### What this tests ####
# # This tests if logging to the helicone integration actually works
import asyncio
import copy
import logging
import os
import sys
import time
from typing import Any
from unittest.mock import MagicMock, patch
# import sys, os
# import traceback
# import pytest
logging.basicConfig(level=logging.DEBUG)
sys.path.insert(0, os.path.abspath("../.."))
# sys.path.insert(
# 0, os.path.abspath("../..")
# ) # Adds the parent directory to the system path
# import litellm
# from litellm import embedding, completion
import litellm
from litellm import completion
# litellm.success_callback = ["helicone"]
litellm.num_retries = 3
litellm.success_callback = ["helicone"]
os.environ["HELICONE_DEBUG"] = "True"
os.environ['LITELLM_LOG'] = 'DEBUG'
# litellm.set_verbose = True
import pytest
# user_message = "Hello, how are you?"
# messages = [{"content": user_message, "role": "user"}]
def pre_helicone_setup():
"""
Set up the logging for the 'pre_helicone_setup' function.
"""
import logging
logging.basicConfig(filename="helicone.log", level=logging.DEBUG)
logger = logging.getLogger()
file_handler = logging.FileHandler("helicone.log", mode="w")
file_handler.setLevel(logging.DEBUG)
logger.addHandler(file_handler)
return
# # openai call
# response = completion(
# model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]
# )
def test_helicone_logging_async():
try:
pre_helicone_setup()
litellm.success_callback = []
start_time_empty_callback = asyncio.run(make_async_calls())
print("done with no callback test")
# # cohere call
# response = completion(
# model="command-nightly",
# messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}],
# )
print("starting helicone test")
litellm.success_callback = ["helicone"]
start_time_helicone = asyncio.run(make_async_calls())
print("done with helicone test")
print(f"Time taken with success_callback='helicone': {start_time_helicone}")
print(f"Time taken with empty success_callback: {start_time_empty_callback}")
assert abs(start_time_helicone - start_time_empty_callback) < 1
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
async def make_async_calls(metadata=None, **completion_kwargs):
tasks = []
for _ in range(5):
tasks.append(create_async_task())
start_time = asyncio.get_event_loop().time()
responses = await asyncio.gather(*tasks)
for idx, response in enumerate(responses):
print(f"Response from Task {idx + 1}: {response}")
total_time = asyncio.get_event_loop().time() - start_time
return total_time
def create_async_task(**completion_kwargs):
completion_args = {
"model": "azure/chatgpt-v-2",
"api_version": "2024-02-01",
"messages": [{"role": "user", "content": "This is a test"}],
"max_tokens": 5,
"temperature": 0.7,
"timeout": 5,
"user": "helicone_latency_test_user",
"mock_response": "It's simple to use and easy to get started",
}
completion_args.update(completion_kwargs)
return asyncio.create_task(litellm.acompletion(**completion_args))
@pytest.mark.asyncio
@pytest.mark.skipif(
condition=not os.environ.get("OPENAI_API_KEY", False),
reason="Authentication missing for openai",
)
async def test_helicone_logging_metadata():
import uuid
litellm.success_callback = ["helicone"]
request_id = str(uuid.uuid4())
trace_common_metadata = {
"Helicone-Property-Request-Id": request_id
}
metadata = copy.deepcopy(trace_common_metadata)
metadata["Helicone-Property-Conversation"] = "support_issue"
metadata["Helicone-Auth"] = os.getenv("HELICONE_API_KEY")
response = await create_async_task(
model="gpt-3.5-turbo",
mock_response="Hey! how's it going?",
messages=[
{
"role": "user",
"content": f"{request_id}",
}
],
max_tokens=100,
temperature=0.2,
metadata=copy.deepcopy(metadata),
)
print(response)
time.sleep(3)