diff --git a/litellm/__init__.py b/litellm/__init__.py index 03114b78e..b0099202e 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -269,6 +269,7 @@ from .exceptions import ( ServiceUnavailableError, OpenAIError, ContextWindowExceededError, + BudgetExceededError ) from .budget_manager import BudgetManager diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index 9d7f4f02b..8b54c3254 100644 Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 3b91b0fb3..fcec8b4e6 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index d323a9eae..11ad024d4 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/tests/test_litellm_max_budget.py b/litellm/tests/test_litellm_max_budget.py index 15ca48efe..0e933c604 100644 --- a/litellm/tests/test_litellm_max_budget.py +++ b/litellm/tests/test_litellm_max_budget.py @@ -1,6 +1,6 @@ -#### What this tests #### -# This tests calling litellm.max_budget by making back-to-back gpt-4 calls -# commenting out this test for circle ci, as it causes other tests to fail, since litellm.max_budget would impact other litellm imports +# #### What this tests #### +# # This tests calling litellm.max_budget by making back-to-back gpt-4 calls +# # commenting out this test for circle ci, as it causes other tests to fail, since litellm.max_budget would impact other litellm imports # import sys, os, json # import traceback # import pytest @@ -9,13 +9,23 @@ # 0, os.path.abspath("../..") # ) # Adds the parent directory to the system path # import litellm -# litellm.set_verbose = True -# from litellm import completion +# # litellm.set_verbose = True +# from litellm import completion, BudgetExceededError -# litellm.max_budget = 0.001 # sets a max budget of $0.001 +# def test_max_budget(): +# try: +# litellm.max_budget = 0.001 # sets a max budget of $0.001 + +# messages = [{"role": "user", "content": "Hey, how's it going"}] +# response = completion(model="gpt-4", messages=messages, stream=True) +# for chunk in response: +# continue +# print(litellm._current_cost) +# completion(model="gpt-4", messages=messages, stream=True) +# litellm.max_budget = float('inf') +# except BudgetExceededError as e: +# pass +# except Exception as e: +# pytest.fail(f"An error occured: {str(e)}") -# messages = [{"role": "user", "content": "Hey, how's it going"}] -# completion(model="gpt-4", messages=messages) -# completion(model="gpt-4", messages=messages) -# print(litellm._current_cost) diff --git a/litellm/utils.py b/litellm/utils.py index 69f53a877..7df4330c4 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -180,6 +180,10 @@ class Logging: # Log the exact input to the LLM API print_verbose(f"Logging Details Pre-API Call for call id {self.litellm_call_id}") try: + if start_time is None: + start_time = self.start_time + if end_time is None: + end_time = datetime.datetime.now() # print_verbose(f"logging pre call for model: {self.model} with call type: {self.call_type}") self.model_call_details["input"] = input self.model_call_details["api_key"] = api_key @@ -202,6 +206,11 @@ class Logging: f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" ) + if litellm.max_budget and self.stream: + time_diff = (end_time - start_time).total_seconds() + float_diff = float(time_diff) + litellm._current_cost += litellm.completion_cost(model=self.model, prompt="".join(message["content"] for message in self.messages), completion="", total_time=float_diff) + # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made for callback in litellm.input_callback: try: @@ -314,6 +323,12 @@ class Logging: if end_time is None: end_time = datetime.datetime.now() print_verbose(f"success callbacks: {litellm.success_callback}") + + if litellm.max_budget and self.stream: + time_diff = (end_time - start_time).total_seconds() + float_diff = float(time_diff) + litellm._current_cost += litellm.completion_cost(model=self.model, prompt="", completion=result["content"], total_time=float_diff) + for callback in litellm.success_callback: try: if callback == "lite_debugger": @@ -574,10 +589,6 @@ def client(original_function): if litellm.caching or litellm.caching_with_models or litellm.cache != None: # user init a cache object litellm.cache.add_cache(result, *args, **kwargs) - # [OPTIONAL] UPDATE BUDGET - if litellm.max_budget: - litellm._current_cost += litellm.completion_cost(completion_response=result) - # [OPTIONAL] Return LiteLLM call_id if litellm.use_client == True: result['litellm_call_id'] = litellm_call_id @@ -2383,7 +2394,6 @@ class CustomStreamWrapper: def handle_cohere_chunk(self, chunk): chunk = chunk.decode("utf-8") - print(f"cohere chunk: {chunk}") data_json = json.loads(chunk) try: print(f"data json: {data_json}") @@ -2474,7 +2484,8 @@ class CustomStreamWrapper: completion_obj["content"] = self.handle_cohere_chunk(chunk) else: # openai chat/azure models chunk = next(self.completion_stream) - return chunk # open ai returns finish_reason, we should just return the openai chunk + completion_obj["content"] = chunk["choices"][0]["delta"]["content"] + # return chunk # open ai returns finish_reason, we should just return the openai chunk #completion_obj["content"] = self.handle_openai_chat_completion_chunk(chunk) # LOGGING