diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index f3bf6edfeb..5702cb34f2 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 6b756a49ba..cb633a81e1 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/integrations/litedebugger.py b/litellm/integrations/litedebugger.py index 032f1b6550..ef33c7edd7 100644 --- a/litellm/integrations/litedebugger.py +++ b/litellm/integrations/litedebugger.py @@ -184,21 +184,22 @@ class LiteDebugger: data=json.dumps(litellm_data_obj), ) elif call_type == "completion" and stream == True: - litellm_data_obj = { - "response_time": response_time, - "total_cost": total_cost, - "response": "streamed response", - "litellm_call_id": litellm_call_id, - "status": "success", - } - print_verbose( - f"LiteDebugger: Logging - final data object: {litellm_data_obj}" - ) - response = requests.post( - url=self.api_url, - headers={"content-type": "application/json"}, - data=json.dumps(litellm_data_obj), - ) + if len(response_obj["content"]) > 0: # don't log the empty strings + litellm_data_obj = { + "response_time": response_time, + "total_cost": total_cost, + "response": response_obj["content"], + "litellm_call_id": litellm_call_id, + "status": "success", + } + print_verbose( + f"LiteDebugger: Logging - final data object: {litellm_data_obj}" + ) + response = requests.post( + url=self.api_url, + headers={"content-type": "application/json"}, + data=json.dumps(litellm_data_obj), + ) elif "error" in response_obj: if "Unable to map your input to a model." in response_obj["error"]: total_cost = 0 diff --git a/litellm/tests/test_litedebugger_integration.py b/litellm/tests/test_litedebugger_integration.py index afa84e10c4..c9a569760c 100644 --- a/litellm/tests/test_litedebugger_integration.py +++ b/litellm/tests/test_litedebugger_integration.py @@ -36,8 +36,8 @@ litellm.set_verbose = True score = 0 split_per_model = { - "gpt-4": 0.7, - "claude-instant-1.2": 0.3 + "gpt-4": 0, + "claude-instant-1.2": 1 } @@ -81,26 +81,31 @@ try: raise Exception("LiteLLMDebugger: post-api call not logged!") if "LiteDebugger: Success/Failure Call Logging" not in output: raise Exception("LiteLLMDebugger: success/failure call not logged!") -except: - pass +except Exception as e: + pytest.fail(f"Error occurred: {e}") -# Test 3: On streaming completion call - setting client to true +Test 3: On streaming completion call - setting client to true try: # Redirect stdout old_stdout = sys.stdout sys.stdout = new_stdout = io.StringIO() - response = completion_with_split_tests(models=split_per_model, messages=messages, stream=True, use_client=True, id="6d383c99-488d-481d-aa1b-1f94935cec44") - + response = completion_with_split_tests(models=split_per_model, messages=messages, stream=True, use_client=True, override_client=True, id="6d383c99-488d-481d-aa1b-1f94935cec44") + for data in response: + print(data) # Restore stdout sys.stdout = old_stdout output = new_stdout.getvalue().strip() + print(output) + print(f"response: {response}") + if "LiteDebugger: Pre-API Call Logging" not in output: raise Exception("LiteLLMDebugger: pre-api call not logged!") if "LiteDebugger: Post-API Call Logging" not in output: raise Exception("LiteLLMDebugger: post-api call not logged!") if "LiteDebugger: Success/Failure Call Logging" not in output: raise Exception("LiteLLMDebugger: success/failure call not logged!") -except: - pass +except Exception as e: + pytest.fail(f"Error occurred: {e}") + diff --git a/litellm/utils.py b/litellm/utils.py index 4add7fe703..12260de021 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -157,13 +157,14 @@ class CallTypes(Enum): class Logging: global supabaseClient, liteDebuggerClient - def __init__(self, model, messages, stream, call_type, litellm_call_id, function_id): + def __init__(self, model, messages, stream, call_type, start_time, litellm_call_id, function_id): if call_type not in [item.value for item in CallTypes]: allowed_values = ", ".join([item.value for item in CallTypes]) raise ValueError(f"Invalid call_type {call_type}. Allowed values: {allowed_values}") self.model = model self.messages = messages self.stream = stream + self.start_time = start_time # log the call start time self.call_type = call_type self.litellm_call_id = litellm_call_id self.function_id = function_id @@ -330,11 +331,15 @@ class Logging: pass - def success_handler(self, result, start_time, end_time): + def success_handler(self, result, start_time=None, end_time=None): print_verbose( f"Logging Details LiteLLM-Success Call" ) try: + if start_time is None: + start_time = self.start_time + if end_time is None: + end_time = datetime.datetime.now() for callback in litellm.success_callback: try: if callback == "lite_debugger": @@ -366,11 +371,16 @@ class Logging: ) pass - def failure_handler(self, exception, traceback_exception, start_time, end_time): + def failure_handler(self, exception, traceback_exception, start_time=None, end_time=None): print_verbose( f"Logging Details LiteLLM-Failure Call" ) try: + if start_time is None: + start_time = self.start_time + if end_time is None: + end_time = datetime.datetime.now() + for callback in litellm.failure_callback: try: if callback == "lite_debugger": @@ -451,7 +461,7 @@ def client(original_function): global liteDebuggerClient, get_all_keys def function_setup( - *args, **kwargs + start_time, *args, **kwargs ): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc. try: global callback_list, add_breadcrumb, user_logger_fn, Logging @@ -495,7 +505,7 @@ def client(original_function): elif call_type == CallTypes.embedding.value: messages = args[1] if len(args) > 1 else kwargs["input"] stream = True if "stream" in kwargs and kwargs["stream"] == True else False - logging_obj = Logging(model=model, messages=messages, stream=stream, litellm_call_id=kwargs["litellm_call_id"], function_id=function_id, call_type=call_type) + logging_obj = Logging(model=model, messages=messages, stream=stream, litellm_call_id=kwargs["litellm_call_id"], function_id=function_id, call_type=call_type, start_time=start_time) return logging_obj except: # DO NOT BLOCK running the function because of this print_verbose(f"[Non-Blocking] {traceback.format_exc()}; args - {args}; kwargs - {kwargs}") @@ -521,14 +531,13 @@ def client(original_function): pass def wrapper(*args, **kwargs): - start_time = None + start_time = datetime.datetime.now() result = None litellm_call_id = str(uuid.uuid4()) kwargs["litellm_call_id"] = litellm_call_id - logging_obj = function_setup(*args, **kwargs) + logging_obj = function_setup(start_time, *args, **kwargs) kwargs["litellm_logging_obj"] = logging_obj try: - start_time = datetime.datetime.now() # [OPTIONAL] CHECK CACHE # remove this after deprecating litellm.caching if (litellm.caching or litellm.caching_with_models) and litellm.cache is None: @@ -543,12 +552,11 @@ def client(original_function): # MODEL CALL result = original_function(*args, **kwargs) end_time = datetime.datetime.now() - # LOG SUCCESS - logging_obj.success_handler(result, start_time, end_time) - if "stream" in kwargs and kwargs["stream"] == True: # TODO: Add to cache for streaming return result + + # [OPTIONAL] ADD TO CACHE if litellm.caching or litellm.caching_with_models or litellm.cache != None: # user init a cache object litellm.cache.add_cache(result, *args, **kwargs) @@ -557,7 +565,8 @@ def client(original_function): if litellm.use_client == True: result['litellm_call_id'] = litellm_call_id - # LOG SUCCESS + # LOG SUCCESS - handle streaming success logging in the _next_ object, remove `handle_success` once it's deprecated + threading.Thread(target=logging_obj.success_handler, args=(result, start_time, end_time)).start() my_thread = threading.Thread( target=handle_success, args=(args, kwargs, result, start_time, end_time) ) # don't interrupt execution of main thread @@ -568,7 +577,8 @@ def client(original_function): traceback_exception = traceback.format_exc() crash_reporting(*args, **kwargs, exception=traceback_exception) end_time = datetime.datetime.now() - logging_obj.failure_handler(e, traceback_exception, start_time, end_time) + # LOG FAILURE - handle streaming failure logging in the _next_ object, remove `handle_failure` once it's deprecated + threading.Thread(target=logging_obj.failure_handler, args=(e, traceback_exception, start_time, end_time)).start() my_thread = threading.Thread( target=handle_failure, args=(e, traceback_exception, start_time, end_time, args, kwargs), @@ -1833,7 +1843,7 @@ class CustomStreamWrapper: completion_obj["content"] = self.handle_openai_chat_completion_chunk(chunk) # LOGGING - # self.logging_obj.post_call(completion_obj["content"]) + threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start() # return this for all models return {"choices": [{"delta": completion_obj}]} except: @@ -1933,7 +1943,7 @@ def get_model_split_test(models, completion_call_id): ) -def completion_with_split_tests(models={}, messages=[], use_client=False, **kwargs): +def completion_with_split_tests(models={}, messages=[], use_client=False, override_client=False, **kwargs): """ Example Usage: @@ -1945,7 +1955,7 @@ def completion_with_split_tests(models={}, messages=[], use_client=False, **kwar completion_with_split_tests(models=models, messages=messages) """ import random - if use_client: + if use_client and not override_client: if "id" not in kwargs or kwargs["id"] is None: raise ValueError("Please tag this completion call, if you'd like to update it's split test values through the UI. - eg. `completion_with_split_tests(.., id=1234)`.") # get the most recent model split list from server