diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 13a9e4bdc..9bd5f90be 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -737,8 +737,8 @@ def response_cost_calculator( ) return None except Exception as e: - verbose_logger.error( - "litellm.cost_calculator.py::response_cost_calculator - Exception occurred - {}/n{}".format( + verbose_logger.warning( + "litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format( str(e), traceback.format_exc() ) ) diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index e97be428a..a94e151f4 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -1499,6 +1499,7 @@ class Logging: self.model_call_details["traceback_exception"] = traceback_exception self.model_call_details["end_time"] = end_time self.model_call_details.setdefault("original_response", None) + self.model_call_details["response_cost"] = 0 return start_time, end_time def failure_handler( diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 18a9966be..b3888548d 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,33 +1,4 @@ model_list: - - model_name: azure-ai-mistral + - model_name: ollama-mistral litellm_params: - api_base: os.environ/AZURE_AI_MISTRAL_API_BASE - api_key: os.environ/AZURE_AI_MISTRAL_API_KEY - model: azure_ai/Mistral-large-nmefg - input_cost_per_token: 0.00001 - output_cost_per_token: 0.000004 - - model_name: azure-ai-phi - litellm_params: - api_base: os.environ/AZURE_AI_PHI_API_BASE - api_key: os.environ/AZURE_AI_PHI_API_KEY - model: azure_ai/Phi-3-medium-128k-instruct-fpmvj - - model_name: dbrx - litellm_params: - model: databricks/databricks-dbrx-instruct - api_key: os.environ/DATABRICKS_API_KEY - api_base: os.environ/DATABRICKS_API_BASE - input_cost_per_token: 0.00000075 - output_cost_per_token: 0.00000225 - - - -general_settings: - master_key: sk-1234 - pass_through_endpoints: - - path: "/v1/rerank" - target: "https://api.cohere.com/v1/rerank" - auth: true # 👈 Key change to use LiteLLM Auth / Keys - headers: - Authorization: "bearer os.environ/COHERE_API_KEY" - content-type: application/json - accept: application/json \ No newline at end of file + model: ollama/mistral \ No newline at end of file diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py index d3447721b..1daf1531c 100644 --- a/litellm/tests/test_completion_cost.py +++ b/litellm/tests/test_completion_cost.py @@ -42,6 +42,14 @@ class CustomLoggingHandler(CustomLogger): print(f"response_cost: {self.response_cost} ") + def log_failure_event(self, kwargs, response_obj, start_time, end_time): + print("Reaches log failure event!") + self.response_cost = kwargs["response_cost"] + + async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): + print("Reaches async log failure event!") + self.response_cost = kwargs["response_cost"] + @pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.asyncio @@ -74,6 +82,41 @@ async def test_custom_pricing(sync_mode): assert new_handler.response_cost == 0 +@pytest.mark.parametrize( + "sync_mode", + [True, False], +) +@pytest.mark.asyncio +async def test_failure_completion_cost(sync_mode): + new_handler = CustomLoggingHandler() + litellm.callbacks = [new_handler] + if sync_mode: + try: + response = litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey!"}], + mock_response=Exception("this should trigger an error"), + ) + except Exception: + pass + time.sleep(5) + else: + try: + response = await litellm.acompletion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey!"}], + mock_response=Exception("this should trigger an error"), + ) + except Exception: + pass + await asyncio.sleep(5) + + print(f"new_handler.response_cost: {new_handler.response_cost}") + assert new_handler.response_cost is not None + + assert new_handler.response_cost == 0 + + def test_custom_pricing_as_completion_cost_param(): from litellm import Choices, Message, ModelResponse from litellm.utils import Usage diff --git a/litellm/utils.py b/litellm/utils.py index aaa8f7a4a..a73873394 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -694,7 +694,7 @@ def client(original_function): kwargs["litellm_call_id"] = str(uuid.uuid4()) try: model = args[0] if len(args) > 0 else kwargs["model"] - except: + except Exception: model = None if ( call_type != CallTypes.image_generation.value