From b515d4f441ce8953bb6b420671ffbc2824f4eb0d Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 26 Jul 2024 10:51:17 -0700 Subject: [PATCH] docs(stream.md): add streaming token usage info to docs Closes https://github.com/BerriAI/litellm/issues/4904 --- docs/my-website/src/pages/stream.md | 44 +++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/docs/my-website/src/pages/stream.md b/docs/my-website/src/pages/stream.md index 5e8cc32ca..a524f4ba6 100644 --- a/docs/my-website/src/pages/stream.md +++ b/docs/my-website/src/pages/stream.md @@ -30,4 +30,48 @@ async def test_get_response(): response = asyncio.run(test_get_response()) print(response) +``` + +## Streaming Token Usage + +Supported across all providers. Works the same as openai. + +`stream_options={"include_usage": True}` + +If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value. + +### SDK +```python +from litellm import completion +import os + +os.environ["OPENAI_API_KEY"] = "" + +response = completion(model="gpt-3.5-turbo", messages=messages, stream=True, stream_options={"include_usage": True}) +for chunk in response: + print(chunk['choices'][0]['delta']) +``` + +### PROXY + +```bash +curl https://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-4o", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Hello!" + } + ], + "stream": true, + "stream_options": {"include_usage": true} + }' + ``` \ No newline at end of file