Merge pull request #3359 from BerriAI/litellm_docs_trackin_cost

docs - update track cost with custom callbacks
2025-04-27 19:54:13 +00:00 · 2024-04-29 13:19:25 -07:00 · 2024-04-29 13:19:25 -07:00 · 96608b1f93
commit 96608b1f93
parent 1fcf57504b 5dc911cba9
1 changed files with 6 additions and 30 deletions
--- a/docs/my-website/docs/observability/custom_callback.md
+++ b/docs/my-website/docs/observability/custom_callback.md
@ -331,49 +331,25 @@ response = litellm.completion(model="gpt-3.5-turbo", messages=messages, metadata
 ## Examples
 ### Custom Callback to track costs for Streaming + Non-Streaming
 By default, the response cost is accessible in the logging object via `kwargs["response_cost"]` on success (sync + async)
 ```python
 # Step 1. Write your custom callback function
 def track_cost_callback(
    kwargs,                 # kwargs to completion
    completion_response,    # response from completion
    start_time, end_time    # start/end time
 ):
    try:
-        # init logging config
+        response_cost = kwargs["response_cost"] # litellm calculates response cost for you
-        logging.basicConfig(
+        print("regular response_cost", response_cost)
                filename='cost.log',
                level=logging.INFO,
                format='%(asctime)s - %(message)s',
                datefmt='%Y-%m-%d %H:%M:%S'
        )
        # check if it has collected an entire stream response
        if "complete_streaming_response" in kwargs:
            # for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost 
            completion_response=kwargs["complete_streaming_response"]
            input_text = kwargs["messages"]
            output_text = completion_response["choices"][0]["message"]["content"]
            response_cost = litellm.completion_cost(
                model = kwargs["model"],
                messages = input_text,
                completion=output_text
            )
            print("streaming response_cost", response_cost)
            logging.info(f"Model {kwargs['model']} Cost: ${response_cost:.8f}")
        # for non streaming responses
        else:
            # we pass the completion_response obj
            if kwargs["stream"] != True:
                response_cost = litellm.completion_cost(completion_response=completion_response)
                print("regular response_cost", response_cost)
                logging.info(f"Model {completion_response.model} Cost: ${response_cost:.8f}")
    except:
        pass
-# Assign the custom callback function
+# Step 2. Assign the custom callback function
 litellm.success_callback = [track_cost_callback]
 # Step 3. Make litellm.completion call
 response = completion(
    model="gpt-3.5-turbo",
    messages=[