Merge pull request #3359 from BerriAI/litellm_docs_trackin_cost

docs - update track cost with custom callbacks
2025-04-27 11:43:54 +00:00 · 2024-04-29 13:19:25 -07:00 · 2024-04-29 13:19:25 -07:00 · 96608b1f93
commit 96608b1f93
parent 1fcf57504b 5dc911cba9
1 changed files with 6 additions and 30 deletions
--- a/docs/my-website/docs/observability/custom_callback.md
+++ b/docs/my-website/docs/observability/custom_callback.md
@ -331,49 +331,25 @@ response = litellm.completion(model="gpt-3.5-turbo", messages=messages, metadata
 ## Examples

 ### Custom Callback to track costs for Streaming + Non-Streaming
+By default, the response cost is accessible in the logging object via `kwargs["response_cost"]` on success (sync + async)
 ```python

+# Step 1. Write your custom callback function
 def track_cost_callback(
    kwargs,                 # kwargs to completion
    completion_response,    # response from completion
    start_time, end_time    # start/end time
 ):
    try:
-        # init logging config
-        logging.basicConfig(
-                filename='cost.log',
-                level=logging.INFO,
-                format='%(asctime)s - %(message)s',
-                datefmt='%Y-%m-%d %H:%M:%S'
-        )
-
-        # check if it has collected an entire stream response
-        if "complete_streaming_response" in kwargs:
-            # for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost 
-            completion_response=kwargs["complete_streaming_response"]
-            input_text = kwargs["messages"]
-            output_text = completion_response["choices"][0]["message"]["content"]
-            response_cost = litellm.completion_cost(
-                model = kwargs["model"],
-                messages = input_text,
-                completion=output_text
-            )
-            print("streaming response_cost", response_cost)
-            logging.info(f"Model {kwargs['model']} Cost: ${response_cost:.8f}")
-
-        # for non streaming responses
-        else:
-            # we pass the completion_response obj
-            if kwargs["stream"] != True:
-                response_cost = litellm.completion_cost(completion_response=completion_response)
-                print("regular response_cost", response_cost)
-                logging.info(f"Model {completion_response.model} Cost: ${response_cost:.8f}")
+        response_cost = kwargs["response_cost"] # litellm calculates response cost for you
+        print("regular response_cost", response_cost)
    except:
        pass

-# Assign the custom callback function
+# Step 2. Assign the custom callback function
 litellm.success_callback = [track_cost_callback]

+# Step 3. Make litellm.completion call
 response = completion(
    model="gpt-3.5-turbo",
    messages=[