From 031e0eabf8299d9dc7b40a1e9781e149cfe11dea Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 26 Feb 2024 14:18:56 -0800 Subject: [PATCH 1/4] (v0) start clickhouse --- docker-compose.yml | 2 ++ litellm/integrations/clickhouse.py | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 814677735..a42e3a873 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,6 +8,8 @@ services: - "4000:4000" environment: - AZURE_API_KEY=sk-123 + clickhouse: + image: clickhouse/clickhouse-server diff --git a/litellm/integrations/clickhouse.py b/litellm/integrations/clickhouse.py index fde831a36..c4d5f6b08 100644 --- a/litellm/integrations/clickhouse.py +++ b/litellm/integrations/clickhouse.py @@ -27,6 +27,31 @@ import litellm, uuid from litellm._logging import print_verbose, verbose_logger +def _start_clickhouse(): + import clickhouse_connect + + port = os.getenv("CLICKHOUSE_PORT") + clickhouse_host = os.getenv("CLICKHOUSE_HOST") + if clickhouse_host is not None: + print("setting up clickhouse") + if port is not None and isinstance(port, str): + port = int(port) + + client = clickhouse_connect.get_client( + host=os.getenv("CLICKHOUSE_HOST"), + port=port, + username=os.getenv("CLICKHOUSE_USERNAME"), + password=os.getenv("CLICKHOUSE_PASSWORD"), + ) + + response = client.command( + "CREATE TABLE new_table (key UInt32, value String, metric Float64) ENGINE MergeTree ORDER BY key" + ) + + +_start_clickhouse() + + class ClickhouseLogger: # Class variables or attributes def __init__(self, endpoint=None, headers=None): From 4c0f4d63c3114957e74825e92d3032883511cc12 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 26 Feb 2024 15:25:59 -0800 Subject: [PATCH 2/4] (feat) set up click house table on start --- litellm/integrations/clickhouse.py | 61 ++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/litellm/integrations/clickhouse.py b/litellm/integrations/clickhouse.py index c4d5f6b08..8e133544b 100644 --- a/litellm/integrations/clickhouse.py +++ b/litellm/integrations/clickhouse.py @@ -33,7 +33,7 @@ def _start_clickhouse(): port = os.getenv("CLICKHOUSE_PORT") clickhouse_host = os.getenv("CLICKHOUSE_HOST") if clickhouse_host is not None: - print("setting up clickhouse") + print_verbose("setting up clickhouse") if port is not None and isinstance(port, str): port = int(port) @@ -43,13 +43,46 @@ def _start_clickhouse(): username=os.getenv("CLICKHOUSE_USERNAME"), password=os.getenv("CLICKHOUSE_PASSWORD"), ) - - response = client.command( - "CREATE TABLE new_table (key UInt32, value String, metric Float64) ENGINE MergeTree ORDER BY key" + # view all tables in DB + response = client.query("SHOW TABLES") + print_verbose( + f"checking if litellm spend logs exists, all tables={response.result_rows}" ) + # all tables is returned like this: all tables = [('new_table',), ('spend_logs',)] + # check if spend_logs in all tables + table_names = [all_tables[0] for all_tables in response.result_rows] + if "spend_logs" not in table_names: + print("Clickhouse: spend logs table does not exist... creating it") -_start_clickhouse() + response = client.command( + """ + CREATE TABLE default.spend_logs + ( + `request_id` String, + `call_type` String, + `api_key` String, + `spend` Float64, + `total_tokens` Int256, + `prompt_tokens` Int256, + `completion_tokens` Int256, + `startTime` DateTime, + `endTime` DateTime, + `model` String, + `user` String, + `metadata` String, + `cache_hit` String, + `cache_key` String, + `request_tags` String + ) + ENGINE = MergeTree + ORDER BY tuple(); + """ + ) + else: + # check if spend logs exist, if it does then return the schema + response = client.query("DESCRIBE default.spend_logs") + print_verbose(f"spend logs schema ={response.result_rows}") class ClickhouseLogger: @@ -57,6 +90,8 @@ class ClickhouseLogger: def __init__(self, endpoint=None, headers=None): import clickhouse_connect + _start_clickhouse() + print_verbose( f"ClickhouseLogger init, host {os.getenv('CLICKHOUSE_HOST')}, port {os.getenv('CLICKHOUSE_PORT')}, username {os.getenv('CLICKHOUSE_USERNAME')}" ) @@ -107,15 +142,6 @@ class ClickhouseLogger: ) # Build the initial payload - # Ensure everything in the payload is converted to str - # for key, value in payload.items(): - # try: - # print("key=", key, "type=", type(value)) - # # payload[key] = str(value) - # except: - # # non blocking if it can't cast to a str - # pass - print_verbose(f"\nClickhouse Logger - Logging payload = {payload}") # just get the payload items in one array and payload keys in 2nd array @@ -126,15 +152,10 @@ class ClickhouseLogger: values.append(value) data = [values] - # print("logging data=", data) - # print("logging keys=", keys) - response = self.client.insert("spend_logs", data, column_names=keys) # make request to endpoint with payload - print_verbose( - f"Clickhouse Logger - final response status = {response_status}, response text = {response_text}" - ) + print_verbose(f"Clickhouse Logger - final response = {response}") except Exception as e: traceback.print_exc() verbose_logger.debug(f"Clickhouse - {str(e)}\n{traceback.format_exc()}") From 919530593b99649064a4e5a593f0517c9b277198 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 26 Feb 2024 15:28:57 -0800 Subject: [PATCH 3/4] (feat) basic clickhouse logging setup --- litellm/integrations/clickhouse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/integrations/clickhouse.py b/litellm/integrations/clickhouse.py index 8e133544b..8c363a99c 100644 --- a/litellm/integrations/clickhouse.py +++ b/litellm/integrations/clickhouse.py @@ -152,7 +152,7 @@ class ClickhouseLogger: values.append(value) data = [values] - response = self.client.insert("spend_logs", data, column_names=keys) + response = self.client.insert("default.spend_logs", data, column_names=keys) # make request to endpoint with payload print_verbose(f"Clickhouse Logger - final response = {response}") From 248755a1a5ed55e1103589c166ff2512a8553f05 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 26 Feb 2024 15:38:07 -0800 Subject: [PATCH 4/4] (feat) init clickhouse logger --- litellm/integrations/clickhouse.py | 36 +++++++++++-------------- litellm/tests/test_clickhouse_logger.py | 3 +++ 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/litellm/integrations/clickhouse.py b/litellm/integrations/clickhouse.py index 8c363a99c..bf5223056 100644 --- a/litellm/integrations/clickhouse.py +++ b/litellm/integrations/clickhouse.py @@ -33,7 +33,7 @@ def _start_clickhouse(): port = os.getenv("CLICKHOUSE_PORT") clickhouse_host = os.getenv("CLICKHOUSE_HOST") if clickhouse_host is not None: - print_verbose("setting up clickhouse") + verbose_logger.debug("setting up clickhouse") if port is not None and isinstance(port, str): port = int(port) @@ -45,7 +45,7 @@ def _start_clickhouse(): ) # view all tables in DB response = client.query("SHOW TABLES") - print_verbose( + verbose_logger.debug( f"checking if litellm spend logs exists, all tables={response.result_rows}" ) # all tables is returned like this: all tables = [('new_table',), ('spend_logs',)] @@ -53,7 +53,9 @@ def _start_clickhouse(): table_names = [all_tables[0] for all_tables in response.result_rows] if "spend_logs" not in table_names: - print("Clickhouse: spend logs table does not exist... creating it") + verbose_logger.debug( + "Clickhouse: spend logs table does not exist... creating it" + ) response = client.command( """ @@ -82,7 +84,13 @@ def _start_clickhouse(): else: # check if spend logs exist, if it does then return the schema response = client.query("DESCRIBE default.spend_logs") - print_verbose(f"spend logs schema ={response.result_rows}") + verbose_logger.debug(f"spend logs schema ={response.result_rows}") + # get all logs from spend logs + response = client.query("SELECT * FROM default.spend_logs") + verbose_logger.debug(f"spend logs ={response.result_rows}") + # get size of spend logs + response = client.query("SELECT count(*) FROM default.spend_logs") + verbose_logger.debug(f"spend logs count ={response.result_rows}") class ClickhouseLogger: @@ -92,7 +100,7 @@ class ClickhouseLogger: _start_clickhouse() - print_verbose( + verbose_logger.debug( f"ClickhouseLogger init, host {os.getenv('CLICKHOUSE_HOST')}, port {os.getenv('CLICKHOUSE_PORT')}, username {os.getenv('CLICKHOUSE_USERNAME')}" ) @@ -117,21 +125,7 @@ class ClickhouseLogger: verbose_logger.debug( f"ClickhouseLogger Logging - Enters logging function for model {kwargs}" ) - - # construct payload to send custom logger # follows the same params as langfuse.py - litellm_params = kwargs.get("litellm_params", {}) - metadata = ( - litellm_params.get("metadata", {}) or {} - ) # if litellm_params['metadata'] == None - messages = kwargs.get("messages") - cost = kwargs.get("response_cost", 0.0) - optional_params = kwargs.get("optional_params", {}) - call_type = kwargs.get("call_type", "litellm.completion") - cache_hit = kwargs.get("cache_hit", False) - usage = response_obj["usage"] - id = response_obj.get("id", str(uuid.uuid4())) - from litellm.proxy.utils import get_logging_payload payload = get_logging_payload( @@ -142,7 +136,7 @@ class ClickhouseLogger: ) # Build the initial payload - print_verbose(f"\nClickhouse Logger - Logging payload = {payload}") + verbose_logger.debug(f"\nClickhouse Logger - Logging payload = {payload}") # just get the payload items in one array and payload keys in 2nd array values = [] @@ -155,7 +149,7 @@ class ClickhouseLogger: response = self.client.insert("default.spend_logs", data, column_names=keys) # make request to endpoint with payload - print_verbose(f"Clickhouse Logger - final response = {response}") + verbose_logger.debug(f"Clickhouse Logger - final response = {response}") except Exception as e: traceback.print_exc() verbose_logger.debug(f"Clickhouse - {str(e)}\n{traceback.format_exc()}") diff --git a/litellm/tests/test_clickhouse_logger.py b/litellm/tests/test_clickhouse_logger.py index ab9a72a38..c8341addb 100644 --- a/litellm/tests/test_clickhouse_logger.py +++ b/litellm/tests/test_clickhouse_logger.py @@ -10,6 +10,8 @@ print("Modified sys.path:", sys.path) from litellm import completion import litellm +from litellm._logging import verbose_logger +import logging litellm.num_retries = 3 @@ -22,6 +24,7 @@ async def test_custom_api_logging(): try: litellm.success_callback = ["clickhouse"] litellm.set_verbose = True + verbose_logger.setLevel(logging.DEBUG) await litellm.acompletion( model="gpt-3.5-turbo", messages=[{"role": "user", "content": f"This is a test"}],