diff --git a/litellm/main.py b/litellm/main.py index 2a7759e8a5..37ae125b99 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -5022,10 +5022,9 @@ def stream_chunk_builder( for chunk in chunks: if "usage" in chunk: if "prompt_tokens" in chunk["usage"]: - prompt_tokens += chunk["usage"].get("prompt_tokens", 0) or 0 + prompt_tokens = chunk["usage"].get("prompt_tokens", 0) or 0 if "completion_tokens" in chunk["usage"]: - completion_tokens += chunk["usage"].get("completion_tokens", 0) or 0 - + completion_tokens = chunk["usage"].get("completion_tokens", 0) or 0 try: response["usage"]["prompt_tokens"] = prompt_tokens or token_counter( model=model, messages=messages diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 7f4b86ec40..a7a5c8bf1a 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -2,11 +2,8 @@ model_list: - model_name: "*" litellm_params: model: "openai/*" - mock_response: "Hello world!" - litellm_settings: success_callback: ["langfuse"] - failure_callback: ["langfuse"] general_settings: alerting: ["slack"] diff --git a/litellm/tests/test_stream_chunk_builder.py b/litellm/tests/test_stream_chunk_builder.py index 001ae07e09..342b070ae7 100644 --- a/litellm/tests/test_stream_chunk_builder.py +++ b/litellm/tests/test_stream_chunk_builder.py @@ -1,15 +1,22 @@ -import sys, os, time -import traceback, asyncio +import asyncio +import os +import sys +import time +import traceback + import pytest sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path -from litellm import completion, stream_chunk_builder -import litellm -import os, dotenv -from openai import OpenAI +import os + +import dotenv import pytest +from openai import OpenAI + +import litellm +from litellm import completion, stream_chunk_builder dotenv.load_dotenv() @@ -147,3 +154,45 @@ def test_stream_chunk_builder_litellm_tool_call_regular_message(): # test_stream_chunk_builder_litellm_tool_call_regular_message() + + +def test_stream_chunk_builder_litellm_usage_chunks(): + """ + Checks if stream_chunk_builder is able to correctly rebuild with given metadata from streaming chunks + """ + messages = [ + {"role": "user", "content": "Tell me the funniest joke you know."}, + { + "role": "assistant", + "content": "Why did the chicken cross the road?\nYou will not guess this one I bet\n", + }, + {"role": "user", "content": "I do not know, why?"}, + {"role": "assistant", "content": "uhhhh\n\n\nhmmmm.....\nthinking....\n"}, + {"role": "user", "content": "\nI am waiting...\n\n...\n"}, + ] + # make a regular gemini call + response = completion( + model="gemini/gemini-1.5-flash", + messages=messages, + ) + + usage: litellm.Usage = response.usage + + gemini_pt = usage.prompt_tokens + + # make a streaming gemini call + response = completion( + model="gemini/gemini-1.5-flash", + messages=messages, + stream=True, + complete_response=True, + stream_options={"include_usage": True}, + ) + + usage: litellm.Usage = response.usage + + stream_rebuilt_pt = usage.prompt_tokens + + # assert prompt tokens are the same + + assert gemini_pt == stream_rebuilt_pt