mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
fix(utils.py): stream_options working across all providers
This commit is contained in:
parent
150cfb68d5
commit
84044c08b6
5 changed files with 98 additions and 35 deletions
|
@ -4946,14 +4946,23 @@ def stream_chunk_builder(
|
|||
else:
|
||||
completion_output = ""
|
||||
# # Update usage information if needed
|
||||
prompt_tokens = 0
|
||||
completion_tokens = 0
|
||||
for chunk in chunks:
|
||||
if "usage" in chunk:
|
||||
if "prompt_tokens" in chunk["usage"]:
|
||||
prompt_tokens += chunk["usage"].get("prompt_tokens", 0) or 0
|
||||
if "completion_tokens" in chunk["usage"]:
|
||||
completion_tokens += chunk["usage"].get("completion_tokens", 0) or 0
|
||||
|
||||
try:
|
||||
response["usage"]["prompt_tokens"] = token_counter(
|
||||
response["usage"]["prompt_tokens"] = prompt_tokens or token_counter(
|
||||
model=model, messages=messages
|
||||
)
|
||||
except: # don't allow this failing to block a complete streaming response from being returned
|
||||
print_verbose(f"token_counter failed, assuming prompt tokens is 0")
|
||||
response["usage"]["prompt_tokens"] = 0
|
||||
response["usage"]["completion_tokens"] = token_counter(
|
||||
response["usage"]["completion_tokens"] = completion_tokens or token_counter(
|
||||
model=model,
|
||||
text=completion_output,
|
||||
count_response_tokens=True, # count_response_tokens is a Flag to tell token counter this is a response, No need to add extra tokens we do for input messages
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue