mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge pull request #4607 from maamalama/helicone-cohere
Helicone Headers & Cohere support
This commit is contained in:
commit
5ad341d0ff
3 changed files with 158 additions and 26 deletions
|
@ -4,11 +4,12 @@ import dotenv, os
|
|||
import requests # type: ignore
|
||||
import litellm
|
||||
import traceback
|
||||
from litellm._logging import verbose_logger
|
||||
|
||||
|
||||
class HeliconeLogger:
|
||||
# Class variables or attributes
|
||||
helicone_model_list = ["gpt", "claude"]
|
||||
helicone_model_list = ["gpt", "claude", "command-r", "command-r-plus", "command-light", "command-medium", "command-medium-beta", "command-xlarge-nightly", "command-nightly"]
|
||||
|
||||
def __init__(self):
|
||||
# Instance variables
|
||||
|
@ -37,15 +38,49 @@ class HeliconeLogger:
|
|||
}
|
||||
|
||||
return claude_provider_request, claude_response_obj
|
||||
|
||||
@staticmethod
|
||||
def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict:
|
||||
"""
|
||||
Adds metadata from proxy request headers to Helicone logging if keys start with "helicone_"
|
||||
and overwrites litellm_params.metadata if already included.
|
||||
|
||||
For example if you want to add custom property to your request, send
|
||||
`headers: { ..., helicone-property-something: 1234 }` via proxy request.
|
||||
"""
|
||||
if litellm_params is None:
|
||||
return metadata
|
||||
|
||||
if litellm_params.get("proxy_server_request") is None:
|
||||
return metadata
|
||||
|
||||
if metadata is None:
|
||||
metadata = {}
|
||||
|
||||
proxy_headers = (
|
||||
litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
|
||||
)
|
||||
|
||||
for header_key in proxy_headers:
|
||||
if header_key.startswith("helicone_"):
|
||||
metadata[header_key] = proxy_headers.get(header_key)
|
||||
|
||||
return metadata
|
||||
|
||||
def log_success(
|
||||
self, model, messages, response_obj, start_time, end_time, print_verbose
|
||||
self, model, messages, response_obj, start_time, end_time, print_verbose, kwargs
|
||||
):
|
||||
# Method definition
|
||||
try:
|
||||
print_verbose(
|
||||
f"Helicone Logging - Enters logging function for model {model}"
|
||||
)
|
||||
litellm_params = kwargs.get("litellm_params", {})
|
||||
litellm_call_id = kwargs.get("litellm_call_id", None)
|
||||
metadata = (
|
||||
litellm_params.get("metadata", {}) or {}
|
||||
)
|
||||
metadata = self.add_metadata_from_header(litellm_params, metadata)
|
||||
model = (
|
||||
model
|
||||
if any(
|
||||
|
@ -85,11 +120,13 @@ class HeliconeLogger:
|
|||
end_time_milliseconds = int(
|
||||
(end_time.timestamp() - end_time_seconds) * 1000
|
||||
)
|
||||
meta = {"Helicone-Auth": f"Bearer {self.key}"}
|
||||
meta.update(metadata)
|
||||
data = {
|
||||
"providerRequest": {
|
||||
"url": self.provider_url,
|
||||
"json": provider_request,
|
||||
"meta": {"Helicone-Auth": f"Bearer {self.key}"},
|
||||
"meta": meta,
|
||||
},
|
||||
"providerResponse": providerResponse,
|
||||
"timing": {
|
||||
|
|
|
@ -811,6 +811,7 @@ class Logging:
|
|||
print_verbose("reaches helicone for logging!")
|
||||
model = self.model
|
||||
messages = self.model_call_details["input"]
|
||||
kwargs = self.model_call_details
|
||||
heliconeLogger.log_success(
|
||||
model=model,
|
||||
messages=messages,
|
||||
|
@ -818,6 +819,7 @@ class Logging:
|
|||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
print_verbose=print_verbose,
|
||||
kwargs=kwargs,
|
||||
)
|
||||
if callback == "langfuse":
|
||||
global langFuseLogger
|
||||
|
|
|
@ -1,31 +1,124 @@
|
|||
# #### What this tests ####
|
||||
# # This tests if logging to the helicone integration actually works
|
||||
import asyncio
|
||||
import copy
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
# import sys, os
|
||||
# import traceback
|
||||
# import pytest
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
|
||||
# sys.path.insert(
|
||||
# 0, os.path.abspath("../..")
|
||||
# ) # Adds the parent directory to the system path
|
||||
# import litellm
|
||||
# from litellm import embedding, completion
|
||||
import litellm
|
||||
from litellm import completion
|
||||
|
||||
# litellm.success_callback = ["helicone"]
|
||||
litellm.num_retries = 3
|
||||
litellm.success_callback = ["helicone"]
|
||||
os.environ["HELICONE_DEBUG"] = "True"
|
||||
os.environ['LITELLM_LOG'] = 'DEBUG'
|
||||
|
||||
# litellm.set_verbose = True
|
||||
import pytest
|
||||
|
||||
# user_message = "Hello, how are you?"
|
||||
# messages = [{"content": user_message, "role": "user"}]
|
||||
def pre_helicone_setup():
|
||||
"""
|
||||
Set up the logging for the 'pre_helicone_setup' function.
|
||||
"""
|
||||
import logging
|
||||
|
||||
logging.basicConfig(filename="helicone.log", level=logging.DEBUG)
|
||||
logger = logging.getLogger()
|
||||
|
||||
file_handler = logging.FileHandler("helicone.log", mode="w")
|
||||
file_handler.setLevel(logging.DEBUG)
|
||||
logger.addHandler(file_handler)
|
||||
return
|
||||
|
||||
|
||||
# # openai call
|
||||
# response = completion(
|
||||
# model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]
|
||||
# )
|
||||
def test_helicone_logging_async():
|
||||
try:
|
||||
pre_helicone_setup()
|
||||
litellm.success_callback = []
|
||||
start_time_empty_callback = asyncio.run(make_async_calls())
|
||||
print("done with no callback test")
|
||||
|
||||
# # cohere call
|
||||
# response = completion(
|
||||
# model="command-nightly",
|
||||
# messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}],
|
||||
# )
|
||||
print("starting helicone test")
|
||||
litellm.success_callback = ["helicone"]
|
||||
start_time_helicone = asyncio.run(make_async_calls())
|
||||
print("done with helicone test")
|
||||
|
||||
print(f"Time taken with success_callback='helicone': {start_time_helicone}")
|
||||
print(f"Time taken with empty success_callback: {start_time_empty_callback}")
|
||||
|
||||
assert abs(start_time_helicone - start_time_empty_callback) < 1
|
||||
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred - {e}")
|
||||
|
||||
|
||||
async def make_async_calls(metadata=None, **completion_kwargs):
|
||||
tasks = []
|
||||
for _ in range(5):
|
||||
tasks.append(create_async_task())
|
||||
|
||||
start_time = asyncio.get_event_loop().time()
|
||||
|
||||
responses = await asyncio.gather(*tasks)
|
||||
|
||||
for idx, response in enumerate(responses):
|
||||
print(f"Response from Task {idx + 1}: {response}")
|
||||
|
||||
total_time = asyncio.get_event_loop().time() - start_time
|
||||
|
||||
return total_time
|
||||
|
||||
|
||||
def create_async_task(**completion_kwargs):
|
||||
completion_args = {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_version": "2024-02-01",
|
||||
"messages": [{"role": "user", "content": "This is a test"}],
|
||||
"max_tokens": 5,
|
||||
"temperature": 0.7,
|
||||
"timeout": 5,
|
||||
"user": "helicone_latency_test_user",
|
||||
"mock_response": "It's simple to use and easy to get started",
|
||||
}
|
||||
completion_args.update(completion_kwargs)
|
||||
return asyncio.create_task(litellm.acompletion(**completion_args))
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skipif(
|
||||
condition=not os.environ.get("OPENAI_API_KEY", False),
|
||||
reason="Authentication missing for openai",
|
||||
)
|
||||
async def test_helicone_logging_metadata():
|
||||
import uuid
|
||||
litellm.success_callback = ["helicone"]
|
||||
|
||||
request_id = str(uuid.uuid4())
|
||||
trace_common_metadata = {
|
||||
"Helicone-Property-Request-Id": request_id
|
||||
}
|
||||
|
||||
metadata = copy.deepcopy(trace_common_metadata)
|
||||
metadata["Helicone-Property-Conversation"] = "support_issue"
|
||||
metadata["Helicone-Auth"] = os.getenv("HELICONE_API_KEY")
|
||||
response = await create_async_task(
|
||||
model="gpt-3.5-turbo",
|
||||
mock_response="Hey! how's it going?",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"{request_id}",
|
||||
}
|
||||
],
|
||||
max_tokens=100,
|
||||
temperature=0.2,
|
||||
metadata=copy.deepcopy(metadata),
|
||||
)
|
||||
print(response)
|
||||
|
||||
time.sleep(3)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue