(docs) add how to track costs for streaming responses

2023-10-18 17:42:54 -07:00 · 2023-10-18 17:42:54 -07:00 · 0a83d1a924
commit 0a83d1a924
parent 3a8c8f56d6
2 changed files with 87 additions and 0 deletions
--- a/docs/my-website/docs/index.md
+++ b/docs/my-website/docs/index.md
@ -349,6 +349,49 @@ print("Cost for completion call with gpt-3.5-turbo: ", f"${float(cost):.10f}")
 Cost for completion call with gpt-3.5-turbo:  $0.0000775000
 ```

+### Track Costs, Usage, Latency for streaming
+Use a callback function for this - more info on custom callbacks: https://docs.litellm.ai/docs/observability/custom_callback
+
+```python
+import litellm
+
+# track_cost_callback 
+def track_cost_callback(
+    kwargs,                 # kwargs to completion
+    completion_response,    # response from completion
+    start_time, end_time    # start/end time
+):
+    try:
+        # check if it has collected an entire stream response
+        if "complete_streaming_response" in kwargs:
+            # for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost 
+            completion_response=kwargs["complete_streaming_response"]
+            input_text = kwargs["messages"]
+            output_text = completion_response["choices"][0]["message"]["content"]
+            response_cost = litellm.completion_cost(
+                model = kwargs["model"],
+                messages = input_text,
+                completion=output_text
+            )
+            print("streaming response_cost", response_cost)
+    except:
+        pass
+# set callback 
+litellm.success_callback = [track_cost_callback] # set custom callback function
+
+# litellm.completion() call
+response = completion(
+    model="gpt-3.5-turbo",
+    messages=[
+        {
+            "role": "user",
+            "content": "Hi 👋 - i'm openai"
+        }
+    ],
+    stream=True
+)
+```
+
 Need a dedicated key? Email us @ krrish@berri.ai


--- a/docs/my-website/src/pages/index.md
+++ b/docs/my-website/src/pages/index.md
@ -327,6 +327,50 @@ print("Cost for completion call with gpt-3.5-turbo: ", f"${float(cost):.10f}")
 Cost for completion call with gpt-3.5-turbo:  $0.0000775000
 ```

+### Track Costs, Usage, Latency for streaming
+Use a callback function for this - more info on custom callbacks: https://docs.litellm.ai/docs/observability/custom_callback
+
+```python
+import litellm
+
+# track_cost_callback 
+def track_cost_callback(
+    kwargs,                 # kwargs to completion
+    completion_response,    # response from completion
+    start_time, end_time    # start/end time
+):
+    try:
+        # check if it has collected an entire stream response
+        if "complete_streaming_response" in kwargs:
+            # for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost 
+            completion_response=kwargs["complete_streaming_response"]
+            input_text = kwargs["messages"]
+            output_text = completion_response["choices"][0]["message"]["content"]
+            response_cost = litellm.completion_cost(
+                model = kwargs["model"],
+                messages = input_text,
+                completion=output_text
+            )
+            print("streaming response_cost", response_cost)
+    except:
+        pass
+# set callback 
+litellm.success_callback = [track_cost_callback] # set custom callback function
+
+# litellm.completion() call
+response = completion(
+    model="gpt-3.5-turbo",
+    messages=[
+        {
+            "role": "user",
+            "content": "Hi 👋 - i'm openai"
+        }
+    ],
+    stream=True
+)
+```
+
+
 Need a dedicated key? Email us @ krrish@berri.ai