From 7125016d2454b9e61d8c0c76262925699769382c Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 10 Oct 2023 11:36:02 -0700
Subject: [PATCH] (docs) custom callback for tracking costs

---
 .../docs/observability/custom_callback.md     | 58 +++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/docs/my-website/docs/observability/custom_callback.md b/docs/my-website/docs/observability/custom_callback.md
index a579be002..f0a0dfa0d 100644
--- a/docs/my-website/docs/observability/custom_callback.md
+++ b/docs/my-website/docs/observability/custom_callback.md
@@ -62,6 +62,64 @@ LiteLLM will pass you the complete streaming response in the final streaming chu
             pass
 ```
 
+## Examples
+
+### Custom Callback to track costs for Streaming + Non-Streaming
+```python
+
+def track_cost_callback(
+    kwargs,                 # kwargs to completion
+    completion_response,    # response from completion
+    start_time, end_time    # start/end time
+):
+    try:
+        # init logging config
+        logging.basicConfig(
+                filename='cost.log',
+                level=logging.INFO,
+                format='%(asctime)s - %(message)s',
+                datefmt='%Y-%m-%d %H:%M:%S'
+        )
+
+        # check if it has collected an entire stream response
+        if "complete_streaming_response" in kwargs:
+            # for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost 
+            completion_response=kwargs["complete_streaming_response"]
+            input_text = kwargs["messages"]
+            output_text = completion_response["choices"][0]["message"]["content"]
+            response_cost = litellm.completion_cost(
+                model = kwargs["model"],
+                messages = input_text,
+                completion=output_text
+            )
+            print("streaming response_cost", response_cost)
+            logging.info(f"Model {kwargs['model']} Cost: ${response_cost:.8f}")
+
+        # for non streaming responses
+        else:
+            # we pass the completion_response obj
+            if kwargs["stream"] != True:
+                response_cost = litellm.completion_cost(completion_response=completion_response)
+                print("regular response_cost", response_cost)
+                logging.info(f"Model {completion_response.model} Cost: ${response_cost:.8f}")
+    except:
+        pass
+
+# Assign the custom callback function
+litellm.success_callback = [track_cost_callback]
+
+response = completion(
+    model="gpt-3.5-turbo",
+    messages=[
+        {
+            "role": "user",
+            "content": "Hi 👋 - i'm openai"
+        }
+    ]
+)
+
+print(response)
+```
 ### Custom Callback to write to Mixpanel
 
 ```python