fix(utils.py): return openai streaming prompt caching tokens (#6051)

* fix(utils.py): return openai streaming prompt caching tokens

Closes https://github.com/BerriAI/litellm/issues/6038

* fix(main.py): fix error in finish_reason updates
This commit is contained in:
Krish Dholakia 2024-10-03 22:20:13 -04:00 committed by GitHub
parent e97eaea0a3
commit 0774b5eb19
5 changed files with 91 additions and 10 deletions

View file

@ -7813,9 +7813,7 @@ class CustomStreamWrapper:
)
elif isinstance(response_obj["usage"], BaseModel):
model_response.usage = litellm.Usage(
prompt_tokens=response_obj["usage"].prompt_tokens,
completion_tokens=response_obj["usage"].completion_tokens,
total_tokens=response_obj["usage"].total_tokens,
**response_obj["usage"].model_dump()
)
model_response.model = self.model