diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index b15b880211..8e25efb870 100644 Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index ed8d32b965..38191ff5c0 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 8d8cbae60a..df6168122e 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/integrations/litedebugger.py b/litellm/integrations/litedebugger.py index 5187d555f8..314fb7568b 100644 --- a/litellm/integrations/litedebugger.py +++ b/litellm/integrations/litedebugger.py @@ -1,5 +1,5 @@ import requests, traceback, json, os - +import types class LiteDebugger: user_email = None @@ -7,13 +7,12 @@ class LiteDebugger: def __init__(self, email=None): self.api_url = "https://api.litellm.ai/debugger" - # self.api_url = "http://0.0.0.0:4000/debugger" self.validate_environment(email) pass def validate_environment(self, email): try: - self.user_email = os.getenv("LITELLM_EMAIL") or email + self.user_email = (email or os.getenv("LITELLM_TOKEN") or os.getenv("LITELLM_EMAIL")) self.dashboard_url = "https://admin.litellm.ai/" + self.user_email try: print( @@ -23,11 +22,11 @@ class LiteDebugger: print(f"Here's your LiteLLM Dashboard 👉 {self.dashboard_url}") if self.user_email == None: raise Exception( - "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_EMAIL. Set it in your environment. Eg.: os.environ['LITELLM_EMAIL']= " + "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= " ) except Exception as e: raise ValueError( - "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_EMAIL. Set it in your environment. Eg.: os.environ['LITELLM_EMAIL']= " + "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= " ) def input_log_event( @@ -36,6 +35,7 @@ class LiteDebugger: messages, end_user, litellm_call_id, + call_type, print_verbose, litellm_params, optional_params, @@ -52,39 +52,76 @@ class LiteDebugger: updated_litellm_params = remove_key_value(litellm_params, "logger_fn") - litellm_data_obj = { - "model": model, - "messages": messages, - "end_user": end_user, - "status": "initiated", - "litellm_call_id": litellm_call_id, - "user_email": self.user_email, - "litellm_params": updated_litellm_params, - "optional_params": optional_params, - } - print_verbose( - f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}" - ) - response = requests.post( - url=self.api_url, - headers={"content-type": "application/json"}, - data=json.dumps(litellm_data_obj), - ) - print_verbose(f"LiteDebugger: api response - {response.text}") + if call_type == "embedding": + for message in messages: # assuming the input is a list as required by the embedding function + litellm_data_obj = { + "model": model, + "messages": [{"role": "user", "content": message}], + "end_user": end_user, + "status": "initiated", + "litellm_call_id": litellm_call_id, + "user_email": self.user_email, + "litellm_params": updated_litellm_params, + "optional_params": optional_params, + } + print_verbose( + f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}" + ) + response = requests.post( + url=self.api_url, + headers={"content-type": "application/json"}, + data=json.dumps(litellm_data_obj), + ) + print_verbose(f"LiteDebugger: embedding api response - {response.text}") + elif call_type == "completion": + litellm_data_obj = { + "model": model, + "messages": messages if isinstance(messages, list) else [{"role": "user", "content": messages}], + "end_user": end_user, + "status": "initiated", + "litellm_call_id": litellm_call_id, + "user_email": self.user_email, + "litellm_params": updated_litellm_params, + "optional_params": optional_params, + } + print_verbose( + f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}" + ) + response = requests.post( + url=self.api_url, + headers={"content-type": "application/json"}, + data=json.dumps(litellm_data_obj), + ) + print_verbose(f"LiteDebugger: completion api response - {response.text}") except: print_verbose( f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}" ) pass - def post_call_log_event(self, original_response, litellm_call_id, print_verbose): + def post_call_log_event(self, original_response, litellm_call_id, print_verbose, call_type, stream): try: - litellm_data_obj = { - "status": "received", - "additional_details": {"original_response": original_response}, - "litellm_call_id": litellm_call_id, - "user_email": self.user_email, - } + if call_type == "embedding": + litellm_data_obj = { + "status": "received", + "additional_details": {"original_response": str(original_response["data"][0]["embedding"][:5])}, # don't store the entire vector + "litellm_call_id": litellm_call_id, + "user_email": self.user_email, + } + elif call_type == "completion" and not stream: + litellm_data_obj = { + "status": "received", + "additional_details": {"original_response": original_response}, + "litellm_call_id": litellm_call_id, + "user_email": self.user_email, + } + elif call_type == "completion" and stream: + litellm_data_obj = { + "status": "received", + "additional_details": {"original_response": "Streamed response" if isinstance(original_response, types.GeneratorType) else original_response}, + "litellm_call_id": litellm_call_id, + "user_email": self.user_email, + } response = requests.post( url=self.api_url, headers={"content-type": "application/json"}, @@ -98,32 +135,28 @@ class LiteDebugger: def log_event( self, - model, - messages, end_user, response_obj, start_time, end_time, litellm_call_id, print_verbose, + call_type, + stream = False ): try: print_verbose( - f"LiteLLMDebugger: Logging - Enters handler logging function for model {model} with response object {response_obj}" + f"LiteLLMDebugger: Logging - Enters handler logging function for function {call_type} and stream set to {stream} with response object {response_obj}" ) total_cost = 0 # [TODO] implement cost tracking response_time = (end_time - start_time).total_seconds() - if "choices" in response_obj: + if call_type == "completion" and stream == False: litellm_data_obj = { "response_time": response_time, - "model": response_obj["model"], "total_cost": total_cost, - "messages": messages, - "response": response["choices"][0]["message"]["content"], - "end_user": end_user, + "response": response_obj["choices"][0]["message"]["content"], "litellm_call_id": litellm_call_id, "status": "success", - "user_email": self.user_email, } print_verbose( f"LiteDebugger: Logging - final data object: {litellm_data_obj}" @@ -133,45 +166,26 @@ class LiteDebugger: headers={"content-type": "application/json"}, data=json.dumps(litellm_data_obj), ) - elif ( - "data" in response_obj - and isinstance(response_obj["data"], list) - and len(response_obj["data"]) > 0 - and "embedding" in response_obj["data"][0] - ): - print(f"messages: {messages}") + elif call_type == "embedding": litellm_data_obj = { "response_time": response_time, - "model": response_obj["model"], "total_cost": total_cost, - "messages": messages, "response": str(response_obj["data"][0]["embedding"][:5]), - "end_user": end_user, "litellm_call_id": litellm_call_id, "status": "success", - "user_email": self.user_email, } - print_verbose( - f"LiteDebugger: Logging - final data object: {litellm_data_obj}" - ) response = requests.post( url=self.api_url, headers={"content-type": "application/json"}, data=json.dumps(litellm_data_obj), ) - elif ( - isinstance(response_obj, object) - and response_obj.__class__.__name__ == "CustomStreamWrapper" - ): + elif call_type == "completion" and stream == True: litellm_data_obj = { "response_time": response_time, "total_cost": total_cost, - "messages": messages, - "response": "Streamed response", - "end_user": end_user, + "response": "streamed response", "litellm_call_id": litellm_call_id, "status": "success", - "user_email": self.user_email, } print_verbose( f"LiteDebugger: Logging - final data object: {litellm_data_obj}" @@ -188,7 +202,6 @@ class LiteDebugger: "response_time": response_time, "model": response_obj["model"], "total_cost": total_cost, - "messages": messages, "error": response_obj["error"], "end_user": end_user, "litellm_call_id": litellm_call_id, diff --git a/litellm/llms/ai21.py b/litellm/llms/ai21.py index 9b856be4c7..6a22b99e89 100644 --- a/litellm/llms/ai21.py +++ b/litellm/llms/ai21.py @@ -31,7 +31,7 @@ class AI21LLM: # set the api key if self.api_key == None: raise ValueError( - "Missing Baseten API Key - A call is being made to baseten but no key is set either in the environment variables or via params" + "Missing AI21 API Key - A call is being made to ai21 but no key is set either in the environment variables or via params" ) self.api_key = api_key self.headers = { diff --git a/litellm/main.py b/litellm/main.py index f20f173ff7..9ab017bbbd 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -92,6 +92,7 @@ def completion( custom_llm_provider=None, custom_api_base=None, litellm_call_id=None, + litellm_logging_obj=None, # model specific optional params # used by text-bison only top_k=40, @@ -100,6 +101,7 @@ def completion( ) -> ModelResponse: args = locals() try: + logging = litellm_logging_obj if fallbacks != []: return completion_with_fallbacks(**args) if litellm.model_alias_map and model in litellm.model_alias_map: @@ -151,12 +153,7 @@ def completion( litellm_call_id=litellm_call_id, model_alias_map=litellm.model_alias_map, ) - logging = Logging( - model=model, - messages=messages, - optional_params=optional_params, - litellm_params=litellm_params, - ) + logging.update_environment_variables(optional_params=optional_params, litellm_params=litellm_params) if custom_llm_provider == "azure": # azure configs openai.api_type = "azure" @@ -306,7 +303,7 @@ def completion( response = openai.Completion.create(model=model, prompt=prompt, **optional_params) if "stream" in optional_params and optional_params["stream"] == True: - response = CustomStreamWrapper(response, model) + response = CustomStreamWrapper(response, model, logging_obj=logging) return response ## LOGGING logging.post_call( @@ -363,7 +360,7 @@ def completion( if "stream" in optional_params and optional_params["stream"] == True: # don't try to access stream object, # let the stream handler know this is replicate - response = CustomStreamWrapper(output, "replicate") + response = CustomStreamWrapper(output, "replicate", logging_obj=logging) return response response = "" for item in output: @@ -413,7 +410,7 @@ def completion( ) if "stream" in optional_params and optional_params["stream"] == True: # don't try to access stream object, - response = CustomStreamWrapper(model_response, model) + response = CustomStreamWrapper(model_response, model, logging_obj=logging) return response response = model_response elif model in litellm.openrouter_models or custom_llm_provider == "openrouter": @@ -486,7 +483,7 @@ def completion( response = co.generate(model=model, prompt=prompt, **optional_params) if "stream" in optional_params and optional_params["stream"] == True: # don't try to access stream object, - response = CustomStreamWrapper(response, model) + response = CustomStreamWrapper(response, model, logging_obj=logging) return response ## LOGGING logging.post_call( @@ -532,7 +529,7 @@ def completion( if "stream" in optional_params and optional_params["stream"] == True: # don't try to access stream object, response = CustomStreamWrapper( - model_response, model, custom_llm_provider="huggingface" + model_response, model, custom_llm_provider="huggingface", logging_obj=logging ) return response response = model_response @@ -572,7 +569,7 @@ def completion( headers=headers, ) response = CustomStreamWrapper( - res.iter_lines(), model, custom_llm_provider="together_ai" + res.iter_lines(), model, custom_llm_provider="together_ai", logging_obj=logging ) return response else: @@ -689,7 +686,7 @@ def completion( if "stream" in optional_params and optional_params["stream"] == True: # don't try to access stream object, response = CustomStreamWrapper( - model_response, model, custom_llm_provider="ai21" + model_response, model, custom_llm_provider="ai21", logging_obj=logging ) return response @@ -732,7 +729,7 @@ def completion( if "stream" in optional_params and optional_params["stream"] == True: # don't try to access stream object, response = CustomStreamWrapper( - model_response, model, custom_llm_provider="baseten" + model_response, model, custom_llm_provider="baseten", logging_obj=logging ) return response response = model_response @@ -775,8 +772,6 @@ def completion( ) return response except Exception as e: - ## LOGGING - logging.post_call(input=messages, api_key=api_key, original_response=e) ## Map to OpenAI Exception raise exception_type( model=model, custom_llm_provider=custom_llm_provider, original_exception=e @@ -816,21 +811,12 @@ def batch_completion(*args, **kwargs): 60 ) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout` def embedding( - model, input=[], azure=False, force_timeout=60, litellm_call_id=None, logger_fn=None + model, input=[], azure=False, force_timeout=60, litellm_call_id=None, litellm_logging_obj=None, logger_fn=None ): try: response = None - logging = Logging( - model=model, - messages=input, - optional_params={}, - litellm_params={ - "azure": azure, - "force_timeout": force_timeout, - "logger_fn": logger_fn, - "litellm_call_id": litellm_call_id, - }, - ) + logging = litellm_logging_obj + logging.update_environment_variables(optional_params={}, litellm_params={"force_timeout": force_timeout, "azure": azure, "litellm_call_id": litellm_call_id, "logger_fn": logger_fn}) if azure == True: # azure configs openai.api_type = "azure" @@ -849,7 +835,6 @@ def embedding( ) ## EMBEDDING CALL response = openai.Embedding.create(input=input, engine=model) - print_verbose(f"response_value: {str(response)[:100]}") elif model in litellm.open_ai_embedding_models: openai.api_type = "openai" openai.api_base = "https://api.openai.com/v1" @@ -867,15 +852,13 @@ def embedding( ) ## EMBEDDING CALL response = openai.Embedding.create(input=input, model=model) - print_verbose(f"response_value: {str(response)[:100]}") else: args = locals() raise ValueError(f"No valid embedding model args passed in - {args}") - + ## LOGGING + logging.post_call(input=input, api_key=openai.api_key, original_response=response) return response except Exception as e: - ## LOGGING - logging.post_call(input=input, api_key=openai.api_key, original_response=e) ## Map to OpenAI Exception raise exception_type( model=model, diff --git a/litellm/tests/test_litedebugger_integration.py b/litellm/tests/test_litedebugger_integration.py index e40e694fe6..3eca24361c 100644 --- a/litellm/tests/test_litedebugger_integration.py +++ b/litellm/tests/test_litedebugger_integration.py @@ -1,24 +1,30 @@ -# #### What this tests #### -# # This tests if logging to the litedebugger integration actually works -# # pytest mistakes intentional bad calls as failed tests -> [TODO] fix this -# import sys, os -# import traceback -# import pytest +#### What this tests #### +# This tests if logging to the litedebugger integration actually works +# pytest mistakes intentional bad calls as failed tests -> [TODO] fix this +import sys, os +import traceback +import pytest -# sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path -# import litellm -# from litellm import embedding, completion +sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path +import litellm +from litellm import embedding, completion -# litellm.set_verbose = True +litellm.set_verbose = True -# litellm.email = "krrish@berri.ai" +litellm.use_client = True -# user_message = "Hello, how are you?" -# messages = [{ "content": user_message,"role": "user"}] +user_message = "Hello, how are you?" +messages = [{ "content": user_message,"role": "user"}] -# #openai call -# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) +# Test 1: On completion call +response = completion(model="claude-instant-1", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) # print(f"response: {response}") -# #bad request call -# # response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}]) + +# # Test 2: On embedding call +# response = embedding(model="text-embedding-ada-002", input=["sample text"]) +# print(f"response: {response}") + +# # Test 3: On streaming completion call +response = completion(model="replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}], stream=True) +print(f"response: {response}") \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index bc97891251..fedf960c0a 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -141,27 +141,41 @@ def install_and_import(package: str): ####### LOGGING ################### +from enum import Enum + +class CallTypes(Enum): + embedding = 'embedding' + completion = 'completion' + # Logging function -> log the exact model details + what's being sent | Non-Blocking class Logging: global supabaseClient, liteDebuggerClient - def __init__(self, model, messages, optional_params, litellm_params): + def __init__(self, model, messages, stream, call_type, litellm_call_id): + if call_type not in [item.value for item in CallTypes]: + allowed_values = ", ".join([item.value for item in CallTypes]) + raise ValueError(f"Invalid call_type {call_type}. Allowed values: {allowed_values}") self.model = model self.messages = messages + self.stream = stream + self.call_type = call_type + self.litellm_call_id = litellm_call_id + + def update_environment_variables(self, optional_params, litellm_params): self.optional_params = optional_params self.litellm_params = litellm_params self.logger_fn = litellm_params["logger_fn"] print_verbose(f"self.optional_params: {self.optional_params}") self.model_call_details = { - "model": model, - "messages": messages, + "model": self.model, + "messages": self.messages, "optional_params": self.optional_params, "litellm_params": self.litellm_params, } def pre_call(self, input, api_key, model=None, additional_args={}): try: - print_verbose(f"logging pre call for model: {self.model}") + print_verbose(f"logging pre call for model: {self.model} with call type: {self.call_type}") self.model_call_details["input"] = input self.model_call_details["api_key"] = api_key self.model_call_details["additional_args"] = additional_args @@ -215,6 +229,7 @@ class Logging: litellm_params=self.model_call_details["litellm_params"], optional_params=self.model_call_details["optional_params"], print_verbose=print_verbose, + call_type=self.call_type, ) except Exception as e: print_verbose( @@ -235,7 +250,7 @@ class Logging: if capture_exception: # log this error to sentry for debugging capture_exception(e) - def post_call(self, input, api_key, original_response, additional_args={}): + def post_call(self, original_response, input=None, api_key=None, additional_args={}): # Do something here try: self.model_call_details["input"] = input @@ -262,13 +277,13 @@ class Logging: try: if callback == "lite_debugger": print_verbose("reaches litedebugger for post-call logging!") - model = self.model_call_details["model"] - messages = self.model_call_details["input"] print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") liteDebuggerClient.post_call_log_event( original_response=original_response, litellm_call_id=self.litellm_params["litellm_call_id"], print_verbose=print_verbose, + call_type = self.call_type, + stream = self.stream ) except: print_verbose( @@ -285,7 +300,72 @@ class Logging: ) pass - # Add more methods as needed + + def success_handler(self, result, start_time, end_time): + try: + for callback in litellm.success_callback: + try: + if callback == "lite_debugger": + print_verbose("reaches lite_debugger for logging!") + print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") + print_verbose(f"liteDebuggerClient details function {self.call_type} and stream set to {self.stream}") + liteDebuggerClient.log_event( + end_user=litellm._thread_context.user, + response_obj=result, + start_time=start_time, + end_time=end_time, + litellm_call_id=self.litellm_call_id, + print_verbose=print_verbose, + call_type = self.call_type, + stream = self.stream + ) + except Exception as e: + print_verbose( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while post-call logging with integrations {traceback.format_exc()}" + ) + print_verbose( + f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" + ) + if capture_exception: # log this error to sentry for debugging + capture_exception(e) + except: + print_verbose( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" + ) + pass + + def failure_handler(self, exception, traceback_exception, start_time, end_time): + try: + for callback in litellm.failure_callback: + if callback == "lite_debugger": + print_verbose("reaches lite_debugger for logging!") + print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") + result = { + "model": self.model, + "created": time.time(), + "error": traceback_exception, + "usage": { + "prompt_tokens": prompt_token_calculator( + self.model, messages=self.messages + ), + "completion_tokens": 0, + }, + } + liteDebuggerClient.log_event( + model=self.model, + messages=self.messages, + end_user=litellm._thread_context.user, + response_obj=result, + start_time=start_time, + end_time=end_time, + litellm_call_id=self.litellm_call_id, + print_verbose=print_verbose, + call_type = self.call_type, + stream = self.stream + ) + pass + except: + pass def exception_logging( @@ -327,7 +407,7 @@ def client(original_function): *args, **kwargs ): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc. try: - global callback_list, add_breadcrumb, user_logger_fn + global callback_list, add_breadcrumb, user_logger_fn, Logging if ( litellm.email is not None or os.getenv("LITELLM_EMAIL", None) is not None @@ -369,12 +449,22 @@ def client(original_function): ) if "logger_fn" in kwargs: user_logger_fn = kwargs["logger_fn"] - # LOG SUCCESS + # CRASH REPORTING TELEMETRY crash_reporting(*args, **kwargs) + # INIT LOGGER - for user-specified integrations + model = args[0] if len(args) > 1 else kwargs["model"] + call_type = original_function.__name__ + if call_type == CallTypes.completion.value: + messages = args[1] if len(args) > 2 else kwargs["messages"] + elif call_type == CallTypes.embedding.value: + messages = args[1] if len(args) > 2 else kwargs["input"] + stream = True if "stream" in kwargs and kwargs["stream"] == True else False + logging_obj = Logging(model=model, messages=messages, stream=stream, litellm_call_id=kwargs["litellm_call_id"], call_type=call_type) + return logging_obj except: # DO NOT BLOCK running the function because of this print_verbose(f"[Non-Blocking] {traceback.format_exc()}") pass - + def crash_reporting(*args, **kwargs): if litellm.telemetry: try: @@ -397,10 +487,11 @@ def client(original_function): def wrapper(*args, **kwargs): start_time = None result = None + litellm_call_id = str(uuid.uuid4()) + kwargs["litellm_call_id"] = litellm_call_id + logging_obj = function_setup(*args, **kwargs) + kwargs["litellm_logging_obj"] = logging_obj try: - function_setup(*args, **kwargs) - litellm_call_id = str(uuid.uuid4()) - kwargs["litellm_call_id"] = litellm_call_id start_time = datetime.datetime.now() # [OPTIONAL] CHECK CACHE # remove this after deprecating litellm.caching @@ -415,10 +506,13 @@ def client(original_function): # MODEL CALL result = original_function(*args, **kwargs) + end_time = datetime.datetime.now() + # LOG SUCCESS + logging_obj.success_handler(result, start_time, end_time) + if "stream" in kwargs and kwargs["stream"] == True: # TODO: Add to cache for streaming return result - end_time = datetime.datetime.now() # [OPTIONAL] ADD TO CACHE if litellm.caching or litellm.caching_with_models or litellm.cache != None: # user init a cache object litellm.cache.add_cache(result, *args, **kwargs) @@ -433,6 +527,7 @@ def client(original_function): traceback_exception = traceback.format_exc() crash_reporting(*args, **kwargs, exception=traceback_exception) end_time = datetime.datetime.now() + logging_obj.failure_handler(e, traceback_exception, start_time, end_time) my_thread = threading.Thread( target=handle_failure, args=(e, traceback_exception, start_time, end_time, args, kwargs), @@ -917,44 +1012,6 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k litellm_call_id=kwargs["litellm_call_id"], print_verbose=print_verbose, ) - elif callback == "lite_debugger": - print_verbose("reaches lite_debugger for logging!") - print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") - model = args[0] if len(args) > 0 else kwargs["model"] - messages = ( - args[1] - if len(args) > 1 - else kwargs.get( - "messages", - [ - { - "role": "user", - "content": " ".join(kwargs.get("input", "")), - } - ], - ) - ) - result = { - "model": model, - "created": time.time(), - "error": traceback_exception, - "usage": { - "prompt_tokens": prompt_token_calculator( - model, messages=messages - ), - "completion_tokens": 0, - }, - } - liteDebuggerClient.log_event( - model=model, - messages=messages, - end_user=litellm._thread_context.user, - response_obj=result, - start_time=start_time, - end_time=end_time, - litellm_call_id=kwargs["litellm_call_id"], - print_verbose=print_verbose, - ) except: print_verbose( f"Error Occurred while logging failure: {traceback.format_exc()}" @@ -1085,32 +1142,6 @@ def handle_success(args, kwargs, result, start_time, end_time): litellm_call_id=kwargs["litellm_call_id"], print_verbose=print_verbose, ) - elif callback == "lite_debugger": - print_verbose("reaches lite_debugger for logging!") - print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") - messages = ( - args[1] - if len(args) > 1 - else kwargs.get( - "messages", - [ - { - "role": "user", - "content": " ".join(kwargs.get("input", "")), - } - ], - ) - ) - liteDebuggerClient.log_event( - model=model, - messages=messages, - end_user=litellm._thread_context.user, - response_obj=result, - start_time=start_time, - end_time=end_time, - litellm_call_id=kwargs["litellm_call_id"], - print_verbose=print_verbose, - ) except Exception as e: # LOGGING exception_logging(logger_fn=user_logger_fn, exception=e) @@ -1486,9 +1517,10 @@ def get_secret(secret_name): # wraps the completion stream to return the correct format for the model # replicate/anthropic/cohere class CustomStreamWrapper: - def __init__(self, completion_stream, model, custom_llm_provider=None): + def __init__(self, completion_stream, model, custom_llm_provider=None, logging_obj=None): self.model = model self.custom_llm_provider = custom_llm_provider + self.logging_obj = logging_obj if model in litellm.cohere_models: # cohere does not return an iterator, so we need to wrap it in one self.completion_stream = iter(completion_stream) @@ -1497,6 +1529,10 @@ class CustomStreamWrapper: def __iter__(self): return self + + def logging(self, text): + if self.logging_obj: + self.logging_obj.post_call(text) def handle_anthropic_chunk(self, chunk): str_line = chunk.decode("utf-8") # Convert bytes to string @@ -1586,6 +1622,8 @@ class CustomStreamWrapper: elif self.model in litellm.open_ai_text_completion_models: chunk = next(self.completion_stream) completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk) + # LOGGING + self.logging_obj(completion_obj["content"]) # return this for all models return {"choices": [{"delta": completion_obj}]} diff --git a/pyproject.toml b/pyproject.toml index 9f625122f5..d332b4e36a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.495" +version = "0.1.496" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"