fix(utils.py): return 'response_cost' in completion call

Closes https://github.com/BerriAI/litellm/issues/4335
This commit is contained in:
Krrish Dholakia 2024-06-26 17:55:57 -07:00
parent 151d19960e
commit f533e1da09
4 changed files with 260 additions and 64 deletions

View file

@ -899,6 +899,17 @@ def client(original_function):
model=model,
optional_params=getattr(logging_obj, "optional_params", {}),
)
result._hidden_params["response_cost"] = (
litellm.response_cost_calculator(
response_object=result,
model=getattr(logging_obj, "model", ""),
custom_llm_provider=getattr(
logging_obj, "custom_llm_provider", None
),
call_type=getattr(logging_obj, "call_type", "completion"),
optional_params=getattr(logging_obj, "optional_params", {}),
)
)
result._response_ms = (
end_time - start_time
).total_seconds() * 1000 # return response latency in ms like openai
@ -1292,6 +1303,17 @@ def client(original_function):
model=model,
optional_params=kwargs,
)
result._hidden_params["response_cost"] = (
litellm.response_cost_calculator(
response_object=result,
model=getattr(logging_obj, "model", ""),
custom_llm_provider=getattr(
logging_obj, "custom_llm_provider", None
),
call_type=getattr(logging_obj, "call_type", "completion"),
optional_params=getattr(logging_obj, "optional_params", {}),
)
)
if (
isinstance(result, ModelResponse)
or isinstance(result, EmbeddingResponse)