diff --git a/docs/my-website/docs/observability/supabase_integration.md b/docs/my-website/docs/observability/supabase_integration.md index 6ae4f65da..d9fbc2b5a 100644 --- a/docs/my-website/docs/observability/supabase_integration.md +++ b/docs/my-website/docs/observability/supabase_integration.md @@ -22,11 +22,13 @@ create table messages json null default '{}'::json, response json null default '{}'::json, end_user text null default ''::text, + status text null default ''::text, error json null default '{}'::json, response_time real null default '0'::real, total_cost real null, additional_details json null default '{}'::json, - constraint request_logs_pkey primary key (id) + litellm_call_id text unique, + primary key (id) ) tablespace pg_default; ``` diff --git a/litellm/__init__.py b/litellm/__init__.py index 688cd084f..7cbb0e996 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -1,6 +1,6 @@ import threading from typing import Callable, List, Optional - +input_callback: List[str] = [] success_callback: List[str] = [] failure_callback: List[str] = [] set_verbose = False @@ -216,7 +216,6 @@ from .timeout import timeout from .testing import * from .utils import ( client, - logging, exception_type, get_optional_params, modify_integration, @@ -224,6 +223,7 @@ from .utils import ( cost_per_token, completion_cost, get_litellm_params, + Logging ) from .main import * # type: ignore from .integrations import * diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index 480251bd5..c998bff4a 100644 Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index da220e0b9..e35b110bc 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index ba16f47ba..67de95852 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/integrations/__pycache__/supabase.cpython-311.pyc b/litellm/integrations/__pycache__/supabase.cpython-311.pyc index 3a77f3a03..43b7b234c 100644 Binary files a/litellm/integrations/__pycache__/supabase.cpython-311.pyc and b/litellm/integrations/__pycache__/supabase.cpython-311.pyc differ diff --git a/litellm/integrations/supabase.py b/litellm/integrations/supabase.py index d27277589..edc97b6a3 100644 --- a/litellm/integrations/supabase.py +++ b/litellm/integrations/supabase.py @@ -144,6 +144,28 @@ class Supabase: ) return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + def input_log_event(self, model, messages, end_user, litellm_call_id, print_verbose): + try: + print_verbose( + f"Supabase Logging - Enters input logging function for model {model}" + ) + supabase_data_obj = { + "model": model, + "messages": messages, + "end_user": end_user, + "status": "initiated", + "litellm_call_id": litellm_call_id + } + data, count = ( + self.supabase_client.table(self.supabase_table_name) + .insert(supabase_data_obj) + .execute() + ) + print(f"data: {data}") + pass + except: + pass + def log_event( self, model, @@ -152,6 +174,7 @@ class Supabase: response_obj, start_time, end_time, + litellm_call_id, print_verbose, ): try: @@ -176,16 +199,20 @@ class Supabase: "messages": messages, "response": response_obj["choices"][0]["message"]["content"], "end_user": end_user, + "litellm_call_id": litellm_call_id, + "status": "success" } print_verbose( f"Supabase Logging - final data object: {supabase_data_obj}" ) data, count = ( self.supabase_client.table(self.supabase_table_name) - .insert(supabase_data_obj) + .upsert(supabase_data_obj) .execute() ) elif "error" in response_obj: + if "Unable to map your input to a model." in response_obj["error"]: + total_cost = 0 supabase_data_obj = { "response_time": response_time, "model": response_obj["model"], @@ -193,13 +220,15 @@ class Supabase: "messages": messages, "error": response_obj["error"], "end_user": end_user, + "litellm_call_id": litellm_call_id, + "status": "failure" } print_verbose( f"Supabase Logging - final data object: {supabase_data_obj}" ) data, count = ( self.supabase_client.table(self.supabase_table_name) - .insert(supabase_data_obj) + .upsert(supabase_data_obj) .execute() ) diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index 5ebbc640a..8008531e9 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -1,7 +1,6 @@ import os, json from enum import Enum import requests -from litellm import logging import time from typing import Callable from litellm.utils import ModelResponse @@ -22,11 +21,12 @@ class AnthropicError(Exception): class AnthropicLLM: - def __init__(self, encoding, default_max_tokens_to_sample, api_key=None): + def __init__(self, encoding, default_max_tokens_to_sample, logging_obj, api_key=None): self.encoding = encoding self.default_max_tokens_to_sample = default_max_tokens_to_sample self.completion_url = "https://api.anthropic.com/v1/complete" self.api_key = api_key + self.logging_obj = logging_obj self.validate_environment(api_key=api_key) def validate_environment( @@ -84,6 +84,7 @@ class AnthropicLLM: } ## LOGGING + self.logging_obj.pre_call(input=prompt, api_key=self.api_key, additional_args={"complete_input_dict": data}) logging( model=model, input=prompt, @@ -101,16 +102,7 @@ class AnthropicLLM: return response.iter_lines() else: ## LOGGING - logging( - model=model, - input=prompt, - additional_args={ - "litellm_params": litellm_params, - "optional_params": optional_params, - "original_response": response.text, - }, - logger_fn=logger_fn, - ) + self.logging_obj.post_call(input=prompt, api_key=self.api_key, original_response=response.text, additional_args={"complete_input_dict": data}) print_verbose(f"raw model_response: {response.text}") ## RESPONSE OBJECT completion_response = response.json() diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py index 709c2347e..997c322bd 100644 --- a/litellm/llms/huggingface_restapi.py +++ b/litellm/llms/huggingface_restapi.py @@ -2,7 +2,6 @@ import os, json from enum import Enum import requests -from litellm import logging import time from typing import Callable from litellm.utils import ModelResponse @@ -19,8 +18,9 @@ class HuggingfaceError(Exception): class HuggingfaceRestAPILLM: - def __init__(self, encoding, api_key=None) -> None: + def __init__(self, encoding, logging_obj, api_key=None) -> None: self.encoding = encoding + self.logging_obj = logging_obj self.validate_environment(api_key=api_key) def validate_environment( @@ -74,18 +74,10 @@ class HuggingfaceRestAPILLM: optional_params["max_new_tokens"] = value data = { "inputs": prompt, - # "parameters": optional_params + "parameters": optional_params } ## LOGGING - logging( - model=model, - input=prompt, - additional_args={ - "litellm_params": litellm_params, - "optional_params": optional_params, - }, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=self.api_key, additional_args={"complete_input_dict": data}) ## COMPLETION CALL response = requests.post( completion_url, headers=self.headers, data=json.dumps(data) @@ -94,17 +86,7 @@ class HuggingfaceRestAPILLM: return response.iter_lines() else: ## LOGGING - logging( - model=model, - input=prompt, - additional_args={ - "litellm_params": litellm_params, - "optional_params": optional_params, - "original_response": response.text, - }, - logger_fn=logger_fn, - ) - print_verbose(f"raw model_response: {response.text}") + logging.post_call(input=prompt, api_key=self.api_key, original_response=response.text, additional_args={"complete_input_dict": data}) ## RESPONSE OBJECT completion_response = response.json() print_verbose(f"response: {completion_response}") diff --git a/litellm/main.py b/litellm/main.py index d3e53f4c1..a01bea4ad 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -6,11 +6,11 @@ from copy import deepcopy import litellm from litellm import ( # type: ignore client, - logging, exception_type, timeout, get_optional_params, get_litellm_params, + Logging ) from litellm.utils import ( get_secret, @@ -85,6 +85,7 @@ def completion( azure=False, custom_llm_provider=None, custom_api_base=None, + litellm_call_id=None, # model specific optional params # used by text-bison only top_k=40, @@ -129,8 +130,9 @@ def completion( verbose=verbose, custom_llm_provider=custom_llm_provider, custom_api_base=custom_api_base, + litellm_call_id=litellm_call_id ) - + logging = Logging(model=model, messages=messages, optional_params=optional_params, litellm_params=litellm_params) if custom_llm_provider == "azure": # azure configs openai.api_type = "azure" @@ -144,16 +146,14 @@ def completion( if litellm.api_version is not None else get_secret("AZURE_API_VERSION") ) + if not api_key and litellm.azure_key: + api_key = litellm.azure_key + elif not api_key and get_secret("AZURE_API_KEY"): + api_key = get_secret("AZURE_API_KEY") # set key - openai.api_key = api_key or litellm.azure_key or get_secret("AZURE_API_KEY") + openai.api_key = api_key ## LOGGING - logging( - model=model, - input=messages, - additional_args=optional_params, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=messages, api_key=openai.api_key, additional_args={"headers": litellm.headers, "api_version": openai.api_version, "api_base": openai.api_base}) ## COMPLETION CALL if litellm.headers: response = openai.ChatCompletion.create( @@ -166,6 +166,8 @@ def completion( response = openai.ChatCompletion.create( model=model, messages=messages, **optional_params ) + ## LOGGING + logging.post_call(input=messages, api_key=openai.api_key, original_response=response, additional_args={"headers": litellm.headers, "api_version": openai.api_version, "api_base": openai.api_base}) elif ( model in litellm.open_ai_chat_completion_models or custom_llm_provider == "custom_openai" @@ -182,18 +184,15 @@ def completion( if litellm.organization: openai.organization = litellm.organization # set API KEY - openai.api_key = ( - api_key or litellm.openai_key or get_secret("OPENAI_API_KEY") - ) + if not api_key and litellm.openai_key: + api_key = litellm.openai_key + elif not api_key and get_secret("AZURE_API_KEY"): + api_key = get_secret("OPENAI_API_KEY") + + openai.api_key = api_key ## LOGGING - logging( - model=model, - input=messages, - additional_args=args, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=messages, api_key=api_key, additional_args={"headers": litellm.headers, "api_base": api_base}) ## COMPLETION CALL if litellm.headers: response = openai.ChatCompletion.create( @@ -206,6 +205,8 @@ def completion( response = openai.ChatCompletion.create( model=model, messages=messages, **optional_params ) + ## LOGGING + logging.post_call(input=messages, api_key=api_key, original_response=response, additional_args={"headers": litellm.headers}) elif model in litellm.open_ai_text_completion_models: openai.api_type = "openai" openai.api_base = ( @@ -214,20 +215,19 @@ def completion( else "https://api.openai.com/v1" ) openai.api_version = None - openai.api_key = ( - api_key or litellm.openai_key or get_secret("OPENAI_API_KEY") - ) + # set API KEY + if not api_key and litellm.openai_key: + api_key = litellm.openai_key + elif not api_key and get_secret("AZURE_API_KEY"): + api_key = get_secret("OPENAI_API_KEY") + + openai.api_key = api_key + if litellm.organization: openai.organization = litellm.organization prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging( - model=model, - input=prompt, - additional_args=optional_params, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=api_key, additional_args={"openai_organization": litellm.organization, "headers": litellm.headers, "api_base": openai.api_base, "api_type": openai.api_type}) ## COMPLETION CALL if litellm.headers: response = openai.Completion.create( @@ -237,19 +237,10 @@ def completion( ) else: response = openai.Completion.create(model=model, prompt=prompt) - completion_response = response["choices"][0]["text"] ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": completion_response, - }, - logger_fn=logger_fn, - ) + logging.post_call(input=prompt, api_key=api_key, original_response=response, additional_args={"openai_organization": litellm.organization, "headers": litellm.headers, "api_base": openai.api_base, "api_type": openai.api_type}) ## RESPONSE OBJECT + completion_response = response["choices"][0]["text"] model_response["choices"][0]["message"]["content"] = completion_response model_response["created"] = response["created"] model_response["model"] = model @@ -278,13 +269,7 @@ def completion( input["max_length"] = max_tokens # for t5 models input["max_new_tokens"] = max_tokens # for llama2 models ## LOGGING - logging( - model=model, - input=input, - custom_llm_provider=custom_llm_provider, - additional_args={"max_tokens": max_tokens}, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=replicate_key, additional_args={"complete_input_dict": input, "max_tokens": max_tokens}) ## COMPLETION CALL output = replicate.run(model, input=input) if "stream" in optional_params and optional_params["stream"] == True: @@ -297,16 +282,8 @@ def completion( response += item completion_response = response ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": completion_response, - }, - logger_fn=logger_fn, - ) + logging.post_call(input=prompt, api_key=replicate_key, original_response=completion_response, additional_args={"complete_input_dict": input, "max_tokens": max_tokens}) + ## USAGE prompt_tokens = len(encoding.encode(prompt)) completion_tokens = len(encoding.encode(completion_response)) ## RESPONSE OBJECT @@ -327,6 +304,7 @@ def completion( encoding=encoding, default_max_tokens_to_sample=litellm.max_tokens, api_key=anthropic_key, + logging_obj = logging # model call logging done inside the class as we make need to modify I/O to fit anthropic's requirements ) model_response = anthropic_client.completion( model=model, @@ -362,13 +340,7 @@ def completion( "OR_API_KEY" ) ## LOGGING - logging( - model=model, - input=messages, - additional_args=optional_params, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=messages, api_key=openai.api_key) ## COMPLETION CALL if litellm.headers: response = openai.ChatCompletion.create( @@ -395,6 +367,8 @@ def completion( }, **optional_params, ) + ## LOGGING + logging.post_call(input=messages, api_key=openai.api_key, original_response=response) elif model in litellm.cohere_models: # import cohere/if it fails then pip install cohere install_and_import("cohere") @@ -409,31 +383,17 @@ def completion( co = cohere.Client(cohere_key) prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=cohere_key) ## COMPLETION CALL response = co.generate(model=model, prompt=prompt, **optional_params) if "stream" in optional_params and optional_params["stream"] == True: # don't try to access stream object, response = CustomStreamWrapper(response, model) return response - - completion_response = response[0].text ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": completion_response, - }, - logger_fn=logger_fn, - ) + logging.post_call(input=prompt, api_key=cohere_key, original_response=response) + ## USAGE + completion_response = response[0].text prompt_tokens = len(encoding.encode(prompt)) completion_tokens = len(encoding.encode(completion_response)) ## RESPONSE OBJECT @@ -457,7 +417,7 @@ def completion( or os.environ.get("HUGGINGFACE_API_KEY") ) huggingface_client = HuggingfaceRestAPILLM( - encoding=encoding, api_key=huggingface_key + encoding=encoding, api_key=huggingface_key, logging_obj=logging ) model_response = huggingface_client.completion( model=model, @@ -492,12 +452,7 @@ def completion( ) # TODO: Add chat support for together AI ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=TOGETHER_AI_TOKEN) if stream == True: return together_ai_completion_streaming( { @@ -519,17 +474,7 @@ def completion( headers=headers, ) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": res.text, - }, - logger_fn=logger_fn, - ) - + logging.post_call(input=prompt, api_key=TOGETHER_AI_TOKEN, original_response=res.text) # make this safe for reading, if output does not exist raise an error json_response = res.json() if "output" not in json_response: @@ -562,16 +507,7 @@ def completion( prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "litellm_params": litellm_params, - "optional_params": optional_params, - }, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=None) chat_model = ChatModel.from_pretrained(model) @@ -579,16 +515,7 @@ def completion( completion_response = chat.send_message(prompt, **optional_params) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": completion_response, - }, - logger_fn=logger_fn, - ) + logging.post_call(input=prompt, api_key=None, original_response=completion_response) ## RESPONSE OBJECT model_response["choices"][0]["message"]["content"] = completion_response @@ -607,27 +534,13 @@ def completion( prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=None) + vertex_model = TextGenerationModel.from_pretrained(model) completion_response = vertex_model.predict(prompt, **optional_params) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": completion_response, - }, - logger_fn=logger_fn, - ) - + logging.post_call(input=prompt, api_key=None, original_response=completion_response) ## RESPONSE OBJECT model_response["choices"][0]["message"]["content"] = completion_response model_response["created"] = time.time() @@ -641,12 +554,7 @@ def completion( prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=ai21.api_key) ai21_response = ai21.Completion.execute( model=model, @@ -655,16 +563,7 @@ def completion( completion_response = ai21_response["completions"][0]["data"]["text"] ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": completion_response, - }, - logger_fn=logger_fn, - ) + logging.post_call(input=prompt, api_key=ai21.api_key, original_response=completion_response) ## RESPONSE OBJECT model_response["choices"][0]["message"]["content"] = completion_response @@ -678,7 +577,8 @@ def completion( prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn) + logging.pre_call(input=prompt, api_key=None, additional_args={"endpoint": endpoint}) + generator = get_ollama_response_stream(endpoint, model, prompt) # assume all responses are streamed return generator @@ -693,12 +593,7 @@ def completion( prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=base_ten_key) base_ten__model = baseten.deployed_model_version_id(model) @@ -708,16 +603,8 @@ def completion( if type(completion_response) == dict: completion_response = completion_response["generated_text"] - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": completion_response, - }, - logger_fn=logger_fn, - ) + ## LOGGING + logging.post_call(input=prompt, api_key=base_ten_key, original_response=completion_response) ## RESPONSE OBJECT model_response["choices"][0]["message"]["content"] = completion_response @@ -734,26 +621,14 @@ def completion( prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=None, additional_args={"url": url, "max_new_tokens": 100}) + response = requests.post( url, data={"inputs": prompt, "max_new_tokens": 100, "model": model} ) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": response, - }, - logger_fn=logger_fn, - ) + logging.post_call(input=prompt, api_key=None, original_response=response.text, additional_args={"url": url, "max_new_tokens": 100}) + completion_response = response.json()["outputs"] # RESPONSE OBJECT @@ -762,13 +637,6 @@ def completion( model_response["model"] = model response = model_response else: - ## LOGGING - logging( - model=model, - input=messages, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) args = locals() raise ValueError( f"Unable to map your input to a model. Check your input - {args}" @@ -776,14 +644,7 @@ def completion( return response except Exception as e: ## LOGGING - logging( - model=model, - input=messages, - custom_llm_provider=custom_llm_provider, - additional_args={"max_tokens": max_tokens}, - logger_fn=logger_fn, - exception=e, - ) + logging.post_call(input=messages, api_key=api_key, original_response=e) ## Map to OpenAI Exception raise exception_type( model=model, custom_llm_provider=custom_llm_provider, original_exception=e @@ -825,7 +686,7 @@ def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None): openai.api_version = get_secret("AZURE_API_VERSION") openai.api_key = get_secret("AZURE_API_KEY") ## LOGGING - logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + logging.pre_call(model=model, input=input, azure=azure, logger_fn=logger_fn) ## EMBEDDING CALL response = openai.Embedding.create(input=input, engine=model) print_verbose(f"response_value: {str(response)[:50]}") diff --git a/litellm/tests/test_supabase_integration.py b/litellm/tests/test_supabase_integration.py index 882d0bbc6..2326bcfdf 100644 --- a/litellm/tests/test_supabase_integration.py +++ b/litellm/tests/test_supabase_integration.py @@ -1,27 +1,28 @@ -# #### What this tests #### -# # This tests if logging to the helicone integration actually works -# # pytest mistakes intentional bad calls as failed tests -> [TODO] fix this -# import sys, os -# import traceback -# import pytest +#### What this tests #### +# This tests if logging to the helicone integration actually works +# pytest mistakes intentional bad calls as failed tests -> [TODO] fix this +import sys, os +import traceback +import pytest -# sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path -# import litellm -# from litellm import embedding, completion +sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path +import litellm +from litellm import embedding, completion -# litellm.success_callback = ["supabase"] -# litellm.failure_callback = ["supabase"] +litellm.input_callback = ["supabase"] +litellm.success_callback = ["supabase"] +litellm.failure_callback = ["supabase"] -# litellm.modify_integration("supabase",{"table_name": "litellm_logs"}) +litellm.modify_integration("supabase",{"table_name": "test_table"}) -# litellm.set_verbose = True +litellm.set_verbose = True -# user_message = "Hello, how are you?" -# messages = [{ "content": user_message,"role": "user"}] +user_message = "Hello, how are you?" +messages = [{ "content": user_message,"role": "user"}] -# #openai call -# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) +#openai call +response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) -# #bad request call -# response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}]) +#bad request call +response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}]) diff --git a/litellm/utils.py b/litellm/utils.py index 5346ce62a..d340c2df3 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -135,48 +135,105 @@ def install_and_import(package: str): ####### LOGGING ################### # Logging function -> log the exact model details + what's being sent | Non-Blocking -def logging( - model=None, - input=None, - custom_llm_provider=None, - azure=False, +class Logging: + def __init__(self, model, messages, optional_params, litellm_params): + self.model = model + self.messages = messages + self.optional_params = optional_params + self.litellm_params = litellm_params + self.logger_fn = litellm_params["logger_fn"] + self.model_call_details = { + "model": model, + "messages": messages, + "optional_params": self.optional_params, + "litellm_params": self.litellm_params, + } + + def pre_call(self, input, api_key, additional_args={}): + try: + print(f"logging pre call for model: {self.model}") + self.model_call_details["input"] = input + self.model_call_details["api_key"] = api_key + self.model_call_details["additional_args"] = additional_args + + ## User Logging -> if you pass in a custom logging function + print_verbose( + f"Logging Details: logger_fn - {self.logger_fn} | callable(logger_fn) - {callable(self.logger_fn)}" + ) + if self.logger_fn and callable(self.logger_fn): + try: + self.logger_fn( + self.model_call_details + ) # Expectation: any logger function passed in by the user should accept a dict object + except Exception as e: + print_verbose( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" + ) + + ## Input Integration Logging -> If you want to log the fact that an attempt to call the model was made + for callback in litellm.input_callback: + try: + if callback == "supabase": + print_verbose("reaches supabase for logging!") + model = self.model + messages = self.messages + print(f"litellm._thread_context: {litellm._thread_context}") + supabaseClient.input_log_event( + model=model, + messages=messages, + end_user=litellm._thread_context.user, + litellm_call_id=self.litellm_params["litellm_call_id"], + print_verbose=print_verbose, + ) + pass + except: + pass + except: + print_verbose( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" + ) + pass + + def post_call(self, input, api_key, original_response, additional_args={}): + # Do something here + try: + self.model_call_details["input"] = input + self.model_call_details["api_key"] = api_key + self.model_call_details["original_response"] = original_response + self.model_call_details["additional_args"] = additional_args + + ## User Logging -> if you pass in a custom logging function + print_verbose( + f"Logging Details: logger_fn - {self.logger_fn} | callable(logger_fn) - {callable(self.logger_fn)}" + ) + if self.logger_fn and callable(self.logger_fn): + try: + self.logger_fn( + self.model_call_details + ) # Expectation: any logger function passed in by the user should accept a dict object + except Exception as e: + print_verbose( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" + ) + except: + print_verbose( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" + ) + pass + + # Add more methods as needed + + +def exception_logging( additional_args={}, logger_fn=None, exception=None, ): try: model_call_details = {} - if model: - model_call_details["model"] = model - if azure: - model_call_details["azure"] = azure - if custom_llm_provider: - model_call_details["custom_llm_provider"] = custom_llm_provider if exception: model_call_details["exception"] = exception - if input: - model_call_details["input"] = input - - if len(additional_args): - model_call_details["additional_args"] = additional_args - # log additional call details -> api key, etc. - if model: - if ( - azure == True - or model in litellm.open_ai_chat_completion_models - or model in litellm.open_ai_chat_completion_models - or model in litellm.open_ai_embedding_models - ): - model_call_details["api_type"] = openai.api_type - model_call_details["api_base"] = openai.api_base - model_call_details["api_version"] = openai.api_version - model_call_details["api_key"] = openai.api_key - elif "replicate" in model: - model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN") - elif model in litellm.anthropic_models: - model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY") - elif model in litellm.cohere_models: - model_call_details["api_key"] = os.environ.get("COHERE_API_KEY") + model_call_details["additional_args"] = additional_args ## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs print_verbose( f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}" @@ -206,10 +263,10 @@ def client(original_function): try: global callback_list, add_breadcrumb, user_logger_fn if ( - len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0 + len(litellm.input_callback) > 0 or len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0 ) and len(callback_list) == 0: callback_list = list( - set(litellm.success_callback + litellm.failure_callback) + set(litellm.input_callback + litellm.success_callback + litellm.failure_callback) ) set_callbacks( callback_list=callback_list, @@ -299,13 +356,16 @@ def client(original_function): result = None try: function_setup(*args, **kwargs) - ## MODEL CALL + litellm_call_id = str(uuid.uuid4()) + kwargs["litellm_call_id"] = litellm_call_id + ## [OPTIONAL] CHECK CACHE start_time = datetime.datetime.now() if (litellm.caching or litellm.caching_with_models) and ( cached_result := check_cache(*args, **kwargs) ) is not None: result = cached_result else: + ## MODEL CALL result = original_function(*args, **kwargs) end_time = datetime.datetime.now() ## Add response to CACHE @@ -399,6 +459,7 @@ def get_litellm_params( together_ai=False, custom_llm_provider=None, custom_api_base=None, + litellm_call_id=None, ): litellm_params = { "return_async": return_async, @@ -408,6 +469,7 @@ def get_litellm_params( "verbose": verbose, "custom_llm_provider": custom_llm_provider, "custom_api_base": custom_api_base, + "litellm_call_id": litellm_call_id } return litellm_params @@ -567,7 +629,8 @@ def set_callbacks(callback_list): global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient try: for callback in callback_list: - if callback == "sentry" or "SENTRY_API_URL" in os.environ: + print(f"callback: {callback}") + if callback == "sentry": try: import sentry_sdk except ImportError: @@ -623,6 +686,7 @@ def set_callbacks(callback_list): elif callback == "berrispend": berrispendLogger = BerriSpendLogger() elif callback == "supabase": + print(f"instantiating supabase") supabaseClient = Supabase() except Exception as e: raise e @@ -743,7 +807,6 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k "completion_tokens": 0, }, } - print(f"litellm._thread_context: {litellm._thread_context}") supabaseClient.log_event( model=model, messages=messages, @@ -751,9 +814,9 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k response_obj=result, start_time=start_time, end_time=end_time, + litellm_call_id=kwargs["litellm_call_id"], print_verbose=print_verbose, ) - except: print_verbose( f"Error Occurred while logging failure: {traceback.format_exc()}" @@ -769,7 +832,7 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k pass except Exception as e: ## LOGGING - logging(logger_fn=user_logger_fn, exception=e) + exception_logging(logger_fn=user_logger_fn, exception=e) pass @@ -849,11 +912,12 @@ def handle_success(args, kwargs, result, start_time, end_time): response_obj=result, start_time=start_time, end_time=end_time, + litellm_call_id=kwargs["litellm_call_id"], print_verbose=print_verbose, ) except Exception as e: ## LOGGING - logging(logger_fn=user_logger_fn, exception=e) + exception_logging(logger_fn=user_logger_fn, exception=e) print_verbose( f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}" ) @@ -864,7 +928,7 @@ def handle_success(args, kwargs, result, start_time, end_time): pass except Exception as e: ## LOGGING - logging(logger_fn=user_logger_fn, exception=e) + exception_logging(logger_fn=user_logger_fn, exception=e) print_verbose( f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}" ) @@ -912,15 +976,6 @@ def exception_type(model, original_exception, custom_llm_provider): exception_type = type(original_exception).__name__ else: exception_type = "" - logging( - model=model, - additional_args={ - "error_str": error_str, - "exception_type": exception_type, - "original_exception": original_exception, - }, - logger_fn=user_logger_fn, - ) if "claude" in model: # one of the anthropics if hasattr(original_exception, "status_code"): print_verbose(f"status_code: {original_exception.status_code}") @@ -1030,7 +1085,7 @@ def exception_type(model, original_exception, custom_llm_provider): raise original_exception except Exception as e: ## LOGGING - logging( + exception_logging( logger_fn=user_logger_fn, additional_args={ "exception_mapping_worked": exception_mapping_worked,