(docs) add how to track costs for streaming responses

This commit is contained in:
ishaan-jaff 2023-10-18 17:42:54 -07:00
parent 3a8c8f56d6
commit 0a83d1a924
2 changed files with 87 additions and 0 deletions

View file

@ -327,6 +327,50 @@ print("Cost for completion call with gpt-3.5-turbo: ", f"${float(cost):.10f}")
Cost for completion call with gpt-3.5-turbo: $0.0000775000
```
### Track Costs, Usage, Latency for streaming
Use a callback function for this - more info on custom callbacks: https://docs.litellm.ai/docs/observability/custom_callback
```python
import litellm
# track_cost_callback
def track_cost_callback(
kwargs, # kwargs to completion
completion_response, # response from completion
start_time, end_time # start/end time
):
try:
# check if it has collected an entire stream response
if "complete_streaming_response" in kwargs:
# for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost
completion_response=kwargs["complete_streaming_response"]
input_text = kwargs["messages"]
output_text = completion_response["choices"][0]["message"]["content"]
response_cost = litellm.completion_cost(
model = kwargs["model"],
messages = input_text,
completion=output_text
)
print("streaming response_cost", response_cost)
except:
pass
# set callback
litellm.success_callback = [track_cost_callback] # set custom callback function
# litellm.completion() call
response = completion(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "Hi 👋 - i'm openai"
}
],
stream=True
)
```
Need a dedicated key? Email us @ krrish@berri.ai