forked from phoenix/litellm-mirror
fix(utils.py): ensure consistent cost calc b/w returned header and logged object
This commit is contained in:
parent
f51f7750c0
commit
8e9117f701
3 changed files with 20 additions and 26 deletions
|
@ -837,7 +837,7 @@ def client(original_function):
|
|||
and kwargs.get("atranscription", False) != True
|
||||
): # allow users to control returning cached responses from the completion function
|
||||
# checking cache
|
||||
print_verbose(f"INSIDE CHECKING CACHE")
|
||||
print_verbose("INSIDE CHECKING CACHE")
|
||||
if (
|
||||
litellm.cache is not None
|
||||
and str(original_function.__name__)
|
||||
|
@ -965,10 +965,10 @@ def client(original_function):
|
|||
# MODEL CALL
|
||||
result = original_function(*args, **kwargs)
|
||||
end_time = datetime.datetime.now()
|
||||
if "stream" in kwargs and kwargs["stream"] == True:
|
||||
if "stream" in kwargs and kwargs["stream"] is True:
|
||||
if (
|
||||
"complete_response" in kwargs
|
||||
and kwargs["complete_response"] == True
|
||||
and kwargs["complete_response"] is True
|
||||
):
|
||||
chunks = []
|
||||
for idx, chunk in enumerate(result):
|
||||
|
@ -978,15 +978,15 @@ def client(original_function):
|
|||
)
|
||||
else:
|
||||
return result
|
||||
elif "acompletion" in kwargs and kwargs["acompletion"] == True:
|
||||
elif "acompletion" in kwargs and kwargs["acompletion"] is True:
|
||||
return result
|
||||
elif "aembedding" in kwargs and kwargs["aembedding"] == True:
|
||||
elif "aembedding" in kwargs and kwargs["aembedding"] is True:
|
||||
return result
|
||||
elif "aimg_generation" in kwargs and kwargs["aimg_generation"] == True:
|
||||
elif "aimg_generation" in kwargs and kwargs["aimg_generation"] is True:
|
||||
return result
|
||||
elif "atranscription" in kwargs and kwargs["atranscription"] == True:
|
||||
elif "atranscription" in kwargs and kwargs["atranscription"] is True:
|
||||
return result
|
||||
elif "aspeech" in kwargs and kwargs["aspeech"] == True:
|
||||
elif "aspeech" in kwargs and kwargs["aspeech"] is True:
|
||||
return result
|
||||
|
||||
### POST-CALL RULES ###
|
||||
|
@ -1005,7 +1005,7 @@ def client(original_function):
|
|||
litellm.cache.add_cache(result, *args, **kwargs)
|
||||
|
||||
# LOG SUCCESS - handle streaming success logging in the _next_ object, remove `handle_success` once it's deprecated
|
||||
verbose_logger.info(f"Wrapper: Completed Call, calling success_handler")
|
||||
verbose_logger.info("Wrapper: Completed Call, calling success_handler")
|
||||
threading.Thread(
|
||||
target=logging_obj.success_handler, args=(result, start_time, end_time)
|
||||
).start()
|
||||
|
@ -1019,15 +1019,7 @@ def client(original_function):
|
|||
optional_params=getattr(logging_obj, "optional_params", {}),
|
||||
)
|
||||
result._hidden_params["response_cost"] = (
|
||||
litellm.response_cost_calculator(
|
||||
response_object=result,
|
||||
model=getattr(logging_obj, "model", ""),
|
||||
custom_llm_provider=getattr(
|
||||
logging_obj, "custom_llm_provider", None
|
||||
),
|
||||
call_type=getattr(logging_obj, "call_type", "completion"),
|
||||
optional_params=getattr(logging_obj, "optional_params", {}),
|
||||
)
|
||||
logging_obj._response_cost_calculator(result=result)
|
||||
)
|
||||
result._response_ms = (
|
||||
end_time - start_time
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue