forked from phoenix/litellm-mirror
fix(litellm_logging.py): log response_cost=0 for failed calls
Fixes https://github.com/BerriAI/litellm/issues/4604
This commit is contained in:
parent
9cc2daeec9
commit
959c627dd3
5 changed files with 49 additions and 34 deletions
|
@ -737,8 +737,8 @@ def response_cost_calculator(
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.error(
|
verbose_logger.warning(
|
||||||
"litellm.cost_calculator.py::response_cost_calculator - Exception occurred - {}/n{}".format(
|
"litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format(
|
||||||
str(e), traceback.format_exc()
|
str(e), traceback.format_exc()
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
@ -1499,6 +1499,7 @@ class Logging:
|
||||||
self.model_call_details["traceback_exception"] = traceback_exception
|
self.model_call_details["traceback_exception"] = traceback_exception
|
||||||
self.model_call_details["end_time"] = end_time
|
self.model_call_details["end_time"] = end_time
|
||||||
self.model_call_details.setdefault("original_response", None)
|
self.model_call_details.setdefault("original_response", None)
|
||||||
|
self.model_call_details["response_cost"] = 0
|
||||||
return start_time, end_time
|
return start_time, end_time
|
||||||
|
|
||||||
def failure_handler(
|
def failure_handler(
|
||||||
|
|
|
@ -1,33 +1,4 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: azure-ai-mistral
|
- model_name: ollama-mistral
|
||||||
litellm_params:
|
litellm_params:
|
||||||
api_base: os.environ/AZURE_AI_MISTRAL_API_BASE
|
model: ollama/mistral
|
||||||
api_key: os.environ/AZURE_AI_MISTRAL_API_KEY
|
|
||||||
model: azure_ai/Mistral-large-nmefg
|
|
||||||
input_cost_per_token: 0.00001
|
|
||||||
output_cost_per_token: 0.000004
|
|
||||||
- model_name: azure-ai-phi
|
|
||||||
litellm_params:
|
|
||||||
api_base: os.environ/AZURE_AI_PHI_API_BASE
|
|
||||||
api_key: os.environ/AZURE_AI_PHI_API_KEY
|
|
||||||
model: azure_ai/Phi-3-medium-128k-instruct-fpmvj
|
|
||||||
- model_name: dbrx
|
|
||||||
litellm_params:
|
|
||||||
model: databricks/databricks-dbrx-instruct
|
|
||||||
api_key: os.environ/DATABRICKS_API_KEY
|
|
||||||
api_base: os.environ/DATABRICKS_API_BASE
|
|
||||||
input_cost_per_token: 0.00000075
|
|
||||||
output_cost_per_token: 0.00000225
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
general_settings:
|
|
||||||
master_key: sk-1234
|
|
||||||
pass_through_endpoints:
|
|
||||||
- path: "/v1/rerank"
|
|
||||||
target: "https://api.cohere.com/v1/rerank"
|
|
||||||
auth: true # 👈 Key change to use LiteLLM Auth / Keys
|
|
||||||
headers:
|
|
||||||
Authorization: "bearer os.environ/COHERE_API_KEY"
|
|
||||||
content-type: application/json
|
|
||||||
accept: application/json
|
|
|
@ -42,6 +42,14 @@ class CustomLoggingHandler(CustomLogger):
|
||||||
|
|
||||||
print(f"response_cost: {self.response_cost} ")
|
print(f"response_cost: {self.response_cost} ")
|
||||||
|
|
||||||
|
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
print("Reaches log failure event!")
|
||||||
|
self.response_cost = kwargs["response_cost"]
|
||||||
|
|
||||||
|
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
print("Reaches async log failure event!")
|
||||||
|
self.response_cost = kwargs["response_cost"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
@ -74,6 +82,41 @@ async def test_custom_pricing(sync_mode):
|
||||||
assert new_handler.response_cost == 0
|
assert new_handler.response_cost == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"sync_mode",
|
||||||
|
[True, False],
|
||||||
|
)
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_failure_completion_cost(sync_mode):
|
||||||
|
new_handler = CustomLoggingHandler()
|
||||||
|
litellm.callbacks = [new_handler]
|
||||||
|
if sync_mode:
|
||||||
|
try:
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hey!"}],
|
||||||
|
mock_response=Exception("this should trigger an error"),
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
time.sleep(5)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
response = await litellm.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hey!"}],
|
||||||
|
mock_response=Exception("this should trigger an error"),
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
await asyncio.sleep(5)
|
||||||
|
|
||||||
|
print(f"new_handler.response_cost: {new_handler.response_cost}")
|
||||||
|
assert new_handler.response_cost is not None
|
||||||
|
|
||||||
|
assert new_handler.response_cost == 0
|
||||||
|
|
||||||
|
|
||||||
def test_custom_pricing_as_completion_cost_param():
|
def test_custom_pricing_as_completion_cost_param():
|
||||||
from litellm import Choices, Message, ModelResponse
|
from litellm import Choices, Message, ModelResponse
|
||||||
from litellm.utils import Usage
|
from litellm.utils import Usage
|
||||||
|
|
|
@ -694,7 +694,7 @@ def client(original_function):
|
||||||
kwargs["litellm_call_id"] = str(uuid.uuid4())
|
kwargs["litellm_call_id"] = str(uuid.uuid4())
|
||||||
try:
|
try:
|
||||||
model = args[0] if len(args) > 0 else kwargs["model"]
|
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||||
except:
|
except Exception:
|
||||||
model = None
|
model = None
|
||||||
if (
|
if (
|
||||||
call_type != CallTypes.image_generation.value
|
call_type != CallTypes.image_generation.value
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue