fix(utils.py): return 'response_cost' in completion call

Closes https://github.com/BerriAI/litellm/issues/4335
2025-04-25 18:54:30 +00:00 · 2024-06-26 17:55:57 -07:00 · 2024-06-26 17:55:57 -07:00 · f533e1da09
commit f533e1da09
parent 151d19960e
4 changed files with 260 additions and 64 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -899,6 +899,17 @@ def client(original_function):
                    model=model,
                    optional_params=getattr(logging_obj, "optional_params", {}),
                )
+                result._hidden_params["response_cost"] = (
+                    litellm.response_cost_calculator(
+                        response_object=result,
+                        model=getattr(logging_obj, "model", ""),
+                        custom_llm_provider=getattr(
+                            logging_obj, "custom_llm_provider", None
+                        ),
+                        call_type=getattr(logging_obj, "call_type", "completion"),
+                        optional_params=getattr(logging_obj, "optional_params", {}),
+                    )
+                )
            result._response_ms = (
                end_time - start_time
            ).total_seconds() * 1000  # return response latency in ms like openai
@ -1292,6 +1303,17 @@ def client(original_function):
                    model=model,
                    optional_params=kwargs,
                )
+                result._hidden_params["response_cost"] = (
+                    litellm.response_cost_calculator(
+                        response_object=result,
+                        model=getattr(logging_obj, "model", ""),
+                        custom_llm_provider=getattr(
+                            logging_obj, "custom_llm_provider", None
+                        ),
+                        call_type=getattr(logging_obj, "call_type", "completion"),
+                        optional_params=getattr(logging_obj, "optional_params", {}),
+                    )
+                )
            if (
                isinstance(result, ModelResponse)
                or isinstance(result, EmbeddingResponse)