From 2c9560239a9d6d4bf3e9bc0e53147fd2f0a8e100 Mon Sep 17 00:00:00 2001 From: maamalama Date: Fri, 5 Jul 2024 20:17:38 -0700 Subject: [PATCH 1/5] headers and custom gateway --- litellm/integrations/helicone.py | 50 ++++++++++++++++++- litellm/litellm_core_utils/litellm_logging.py | 2 + 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/litellm/integrations/helicone.py b/litellm/integrations/helicone.py index 8ea18a7d5..14b649898 100644 --- a/litellm/integrations/helicone.py +++ b/litellm/integrations/helicone.py @@ -4,11 +4,12 @@ import dotenv, os import requests # type: ignore import litellm import traceback +from litellm._logging import verbose_logger class HeliconeLogger: # Class variables or attributes - helicone_model_list = ["gpt", "claude"] + helicone_model_list = ["gpt", "claude", "command-r", "command-r-plus", "command-light", "command-medium", "command-medium-beta", "command-xlarge-nightly", "command-nightly "] def __init__(self): # Instance variables @@ -37,15 +38,58 @@ class HeliconeLogger: } return claude_provider_request, claude_response_obj + + @staticmethod + def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict: + """ + Adds metadata from proxy request headers to Helicone logging if keys start with "helicone_" + and overwrites litellm_params.metadata if already included. + + For example if you want to add custom property to your request, send + `headers: { ..., helicone-property-something: 1234 }` via proxy request. + """ + if litellm_params is None: + return metadata + + if litellm_params.get("proxy_server_request") is None: + return metadata + + if metadata is None: + metadata = {} + + proxy_headers = ( + litellm_params.get("proxy_server_request", {}).get("headers", {}) or {} + ) + + for metadata_param_key in proxy_headers: + if metadata_param_key.startswith("helicone_"): + trace_param_key = metadata_param_key.replace("helicone_", "", 1) + if trace_param_key in metadata: + verbose_logger.warning( + f"Overwriting Helicone `{trace_param_key}` from request header" + ) + else: + verbose_logger.debug( + f"Found Helicone `{trace_param_key}` in request header" + ) + metadata[trace_param_key] = proxy_headers.get(metadata_param_key) + + return metadata def log_success( - self, model, messages, response_obj, start_time, end_time, print_verbose + self, model, messages, response_obj, start_time, end_time, print_verbose, kwargs ): # Method definition try: print_verbose( f"Helicone Logging - Enters logging function for model {model}" ) + litellm_params = kwargs.get("litellm_params", {}) + litellm_call_id = kwargs.get("litellm_call_id", None) + metadata = ( + litellm_params.get("metadata", {}) or {} + ) + metadata = self.add_metadata_from_header(litellm_params, metadata) model = ( model if any( @@ -73,6 +117,8 @@ class HeliconeLogger: # Code to be executed url = "https://api.hconeai.com/oai/v1/log" + if model.startswith("command"): + url = "https://api.hconeai.com/custom/v1/log" headers = { "Authorization": f"Bearer {self.key}", "Content-Type": "application/json", diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 4edbce5e1..e1ef5a476 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -807,6 +807,7 @@ class Logging: print_verbose("reaches helicone for logging!") model = self.model messages = self.model_call_details["input"] + kwargs = self.model_call_details heliconeLogger.log_success( model=model, messages=messages, @@ -814,6 +815,7 @@ class Logging: start_time=start_time, end_time=end_time, print_verbose=print_verbose, + kwargs=kwargs, ) if callback == "langfuse": global langFuseLogger From 75d7755d348ff684be40c688a464f41160f9b3ec Mon Sep 17 00:00:00 2001 From: maamalama Date: Mon, 8 Jul 2024 12:34:35 -0700 Subject: [PATCH 2/5] some fixes --- litellm/integrations/helicone.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/litellm/integrations/helicone.py b/litellm/integrations/helicone.py index 14b649898..63fa76d52 100644 --- a/litellm/integrations/helicone.py +++ b/litellm/integrations/helicone.py @@ -9,7 +9,7 @@ from litellm._logging import verbose_logger class HeliconeLogger: # Class variables or attributes - helicone_model_list = ["gpt", "claude", "command-r", "command-r-plus", "command-light", "command-medium", "command-medium-beta", "command-xlarge-nightly", "command-nightly "] + helicone_model_list = ["gpt", "claude", "command-r", "command-r-plus", "command-light", "command-medium", "command-medium-beta", "command-xlarge-nightly", "command-nightly"] def __init__(self): # Instance variables @@ -61,18 +61,9 @@ class HeliconeLogger: litellm_params.get("proxy_server_request", {}).get("headers", {}) or {} ) - for metadata_param_key in proxy_headers: - if metadata_param_key.startswith("helicone_"): - trace_param_key = metadata_param_key.replace("helicone_", "", 1) - if trace_param_key in metadata: - verbose_logger.warning( - f"Overwriting Helicone `{trace_param_key}` from request header" - ) - else: - verbose_logger.debug( - f"Found Helicone `{trace_param_key}` in request header" - ) - metadata[trace_param_key] = proxy_headers.get(metadata_param_key) + for header_key in proxy_headers: + if header_key.startswith("helicone_"): + metadata[header_key] = proxy_headers.get(header_key) return metadata @@ -131,11 +122,13 @@ class HeliconeLogger: end_time_milliseconds = int( (end_time.timestamp() - end_time_seconds) * 1000 ) + meta = {"Helicone-Auth": f"Bearer {self.key}"} + meta.update(metadata) data = { "providerRequest": { "url": self.provider_url, "json": provider_request, - "meta": {"Helicone-Auth": f"Bearer {self.key}"}, + "meta": meta, }, "providerResponse": providerResponse, "timing": { From 5715f3b665021e7a770d3d92e287803e386f8ab2 Mon Sep 17 00:00:00 2001 From: maamalama Date: Mon, 8 Jul 2024 14:04:26 -0700 Subject: [PATCH 3/5] helicone tests --- litellm/tests/test_helicone_integration.py | 187 ++++++++++++++++++--- 1 file changed, 164 insertions(+), 23 deletions(-) diff --git a/litellm/tests/test_helicone_integration.py b/litellm/tests/test_helicone_integration.py index 034856000..4b92c1bbd 100644 --- a/litellm/tests/test_helicone_integration.py +++ b/litellm/tests/test_helicone_integration.py @@ -1,31 +1,172 @@ -# #### What this tests #### -# # This tests if logging to the helicone integration actually works +import asyncio +import copy +import logging +import os +import sys +from typing import Any +from unittest.mock import MagicMock, patch -# import sys, os -# import traceback -# import pytest +logging.basicConfig(level=logging.DEBUG) +sys.path.insert(0, os.path.abspath("../..")) -# sys.path.insert( -# 0, os.path.abspath("../..") -# ) # Adds the parent directory to the system path -# import litellm -# from litellm import embedding, completion +import litellm +from litellm import completion -# litellm.success_callback = ["helicone"] +litellm.num_retries = 3 +litellm.success_callback = ["helicone"] +os.environ["HELICONE_DEBUG"] = "True" -# litellm.set_verbose = True +import pytest -# user_message = "Hello, how are you?" -# messages = [{"content": user_message, "role": "user"}] +def search_logs(log_file_path, num_good_logs=1): + import re + + print("\n searching logs") + bad_logs = [] + good_logs = [] + all_logs = [] + try: + with open(log_file_path, "r") as log_file: + lines = log_file.readlines() + print(f"searching logslines: {lines}") + for line in lines: + all_logs.append(line.strip()) + if "/v1/request/query" in line: + print("Found log with /v1/request/query:") + print(line.strip()) + print("\n\n") + match = re.search( + r'"POST /v1/request/query HTTP/1.1" (\d+) (\d+)', + line, + ) + if match: + status_code = int(match.group(1)) + print("STATUS CODE", status_code) + if status_code != 200: + print("got a BAD log") + bad_logs.append(line.strip()) + else: + good_logs.append(line.strip()) + print("\nBad Logs") + print(bad_logs) + if len(bad_logs) > 0: + raise Exception(f"bad logs, Bad logs = {bad_logs}") + assert ( + len(good_logs) == num_good_logs + ), f"Did not get expected number of good logs, expected {num_good_logs}, got {len(good_logs)}. All logs \n {all_logs}" + print("\nGood Logs") + print(good_logs) + if len(good_logs) <= 0: + raise Exception( + f"There were no Good Logs from Helicone. No logs with /v1/request/query status 200. \nAll logs:{all_logs}" + ) + + except Exception as e: + raise e -# # openai call -# response = completion( -# model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}] -# ) +def pre_helicone_setup(): + """ + Set up the logging for the 'pre_helicone_setup' function. + """ + import logging -# # cohere call -# response = completion( -# model="command-nightly", -# messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}], -# ) + logging.basicConfig(filename="helicone.log", level=logging.DEBUG) + logger = logging.getLogger() + + file_handler = logging.FileHandler("helicone.log", mode="w") + file_handler.setLevel(logging.DEBUG) + logger.addHandler(file_handler) + return + + +def test_helicone_logging_async(): + try: + pre_helicone_setup() + litellm.set_verbose = True + + litellm.success_callback = [] + start_time_empty_callback = asyncio.run(make_async_calls()) + print("done with no callback test") + + print("starting helicone test") + litellm.success_callback = ["helicone"] + start_time_helicone = asyncio.run(make_async_calls()) + print("done with helicone test") + + print(f"Time taken with success_callback='helicone': {start_time_helicone}") + print(f"Time taken with empty success_callback: {start_time_empty_callback}") + + assert abs(start_time_helicone - start_time_empty_callback) < 1 + + except litellm.Timeout as e: + pass + except Exception as e: + pytest.fail(f"An exception occurred - {e}") + + +async def make_async_calls(metadata=None, **completion_kwargs): + tasks = [] + for _ in range(5): + tasks.append(create_async_task()) + + start_time = asyncio.get_event_loop().time() + + responses = await asyncio.gather(*tasks) + + for idx, response in enumerate(responses): + print(f"Response from Task {idx + 1}: {response}") + + total_time = asyncio.get_event_loop().time() - start_time + + return total_time + + +def create_async_task(**completion_kwargs): + completion_args = { + "model": "azure/chatgpt-v-2", + "api_version": "2024-02-01", + "messages": [{"role": "user", "content": "This is a test"}], + "max_tokens": 5, + "temperature": 0.7, + "timeout": 5, + "user": "helicone_latency_test_user", + "mock_response": "It's simple to use and easy to get started", + } + completion_args.update(completion_kwargs) + return asyncio.create_task(litellm.acompletion(**completion_args)) + +@pytest.mark.asyncio +async def test_helicone_logging_metadata(): + import uuid + + litellm.set_verbose = True + litellm.success_callback = ["helicone"] + + run_id = str(uuid.uuid4()) + request_id = f"litellm-test-session-{run_id}" + trace_common_metadata = { + "Helicone-Property-Request-Id": request_id + } + for request_num in range(1, 3): + metadata = copy.deepcopy(trace_common_metadata) + metadata["Helicone-Property-Conversation"] = "support_issue" + response = await create_async_task( + model="gpt-3.5-turbo", + mock_response=f"{request_id}", + messages=[ + { + "role": "user", + "content": f"{request_id}", + } + ], + max_tokens=100, + temperature=0.2, + metadata=copy.deepcopy(metadata), + ) + print(response) + + await asyncio.sleep(2) + + # Check log file for entries + search_logs("helicone.log") From 3367c78113e1511b3097891048ecf3c63a78c625 Mon Sep 17 00:00:00 2001 From: maamalama Date: Mon, 8 Jul 2024 14:37:47 -0700 Subject: [PATCH 4/5] fix --- litellm/integrations/helicone.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/litellm/integrations/helicone.py b/litellm/integrations/helicone.py index 63fa76d52..f0666431a 100644 --- a/litellm/integrations/helicone.py +++ b/litellm/integrations/helicone.py @@ -108,8 +108,6 @@ class HeliconeLogger: # Code to be executed url = "https://api.hconeai.com/oai/v1/log" - if model.startswith("command"): - url = "https://api.hconeai.com/custom/v1/log" headers = { "Authorization": f"Bearer {self.key}", "Content-Type": "application/json", From 6e7f2ecf49300b77bedd3d1545d30647852319d1 Mon Sep 17 00:00:00 2001 From: maamalama Date: Mon, 8 Jul 2024 21:17:35 -0700 Subject: [PATCH 5/5] test fixes --- litellm/tests/test_helicone_integration.py | 100 ++++++--------------- 1 file changed, 26 insertions(+), 74 deletions(-) diff --git a/litellm/tests/test_helicone_integration.py b/litellm/tests/test_helicone_integration.py index 4b92c1bbd..8e1f0a94e 100644 --- a/litellm/tests/test_helicone_integration.py +++ b/litellm/tests/test_helicone_integration.py @@ -3,6 +3,7 @@ import copy import logging import os import sys +import time from typing import Any from unittest.mock import MagicMock, patch @@ -15,56 +16,10 @@ from litellm import completion litellm.num_retries = 3 litellm.success_callback = ["helicone"] os.environ["HELICONE_DEBUG"] = "True" +os.environ['LITELLM_LOG'] = 'DEBUG' import pytest -def search_logs(log_file_path, num_good_logs=1): - import re - - print("\n searching logs") - bad_logs = [] - good_logs = [] - all_logs = [] - try: - with open(log_file_path, "r") as log_file: - lines = log_file.readlines() - print(f"searching logslines: {lines}") - for line in lines: - all_logs.append(line.strip()) - if "/v1/request/query" in line: - print("Found log with /v1/request/query:") - print(line.strip()) - print("\n\n") - match = re.search( - r'"POST /v1/request/query HTTP/1.1" (\d+) (\d+)', - line, - ) - if match: - status_code = int(match.group(1)) - print("STATUS CODE", status_code) - if status_code != 200: - print("got a BAD log") - bad_logs.append(line.strip()) - else: - good_logs.append(line.strip()) - print("\nBad Logs") - print(bad_logs) - if len(bad_logs) > 0: - raise Exception(f"bad logs, Bad logs = {bad_logs}") - assert ( - len(good_logs) == num_good_logs - ), f"Did not get expected number of good logs, expected {num_good_logs}, got {len(good_logs)}. All logs \n {all_logs}" - print("\nGood Logs") - print(good_logs) - if len(good_logs) <= 0: - raise Exception( - f"There were no Good Logs from Helicone. No logs with /v1/request/query status 200. \nAll logs:{all_logs}" - ) - - except Exception as e: - raise e - - def pre_helicone_setup(): """ Set up the logging for the 'pre_helicone_setup' function. @@ -83,8 +38,6 @@ def pre_helicone_setup(): def test_helicone_logging_async(): try: pre_helicone_setup() - litellm.set_verbose = True - litellm.success_callback = [] start_time_empty_callback = asyncio.run(make_async_calls()) print("done with no callback test") @@ -137,36 +90,35 @@ def create_async_task(**completion_kwargs): return asyncio.create_task(litellm.acompletion(**completion_args)) @pytest.mark.asyncio +@pytest.mark.skipif( + condition=not os.environ.get("OPENAI_API_KEY", False), + reason="Authentication missing for openai", +) async def test_helicone_logging_metadata(): import uuid - - litellm.set_verbose = True litellm.success_callback = ["helicone"] - run_id = str(uuid.uuid4()) - request_id = f"litellm-test-session-{run_id}" + request_id = str(uuid.uuid4()) trace_common_metadata = { "Helicone-Property-Request-Id": request_id } - for request_num in range(1, 3): - metadata = copy.deepcopy(trace_common_metadata) - metadata["Helicone-Property-Conversation"] = "support_issue" - response = await create_async_task( - model="gpt-3.5-turbo", - mock_response=f"{request_id}", - messages=[ - { - "role": "user", - "content": f"{request_id}", - } - ], - max_tokens=100, - temperature=0.2, - metadata=copy.deepcopy(metadata), - ) - print(response) + + metadata = copy.deepcopy(trace_common_metadata) + metadata["Helicone-Property-Conversation"] = "support_issue" + metadata["Helicone-Auth"] = os.getenv("HELICONE_API_KEY") + response = await create_async_task( + model="gpt-3.5-turbo", + mock_response="Hey! how's it going?", + messages=[ + { + "role": "user", + "content": f"{request_id}", + } + ], + max_tokens=100, + temperature=0.2, + metadata=copy.deepcopy(metadata), + ) + print(response) - await asyncio.sleep(2) - - # Check log file for entries - search_logs("helicone.log") + time.sleep(3)