diff --git a/litellm/utils.py b/litellm/utils.py index a8e872bd2..820e22afc 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -11387,8 +11387,13 @@ class CustomStreamWrapper: if ( self.stream_options and self.stream_options.get("include_usage", False) == True + and response_obj["usage"] is not None ): - model_response.usage = response_obj["usage"] + model_response.usage = litellm.Usage( + prompt_tokens=response_obj["usage"].prompt_tokens, + completion_tokens=response_obj["usage"].completion_tokens, + total_tokens=response_obj["usage"].total_tokens, + ) elif self.custom_llm_provider == "databricks": response_obj = litellm.DatabricksConfig()._chunk_parser(chunk) completion_obj["content"] = response_obj["text"] @@ -11398,8 +11403,13 @@ class CustomStreamWrapper: if ( self.stream_options and self.stream_options.get("include_usage", False) == True + and response_obj["usage"] is not None ): - model_response.usage = response_obj["usage"] + model_response.usage = litellm.Usage( + prompt_tokens=response_obj["usage"].prompt_tokens, + completion_tokens=response_obj["usage"].completion_tokens, + total_tokens=response_obj["usage"].total_tokens, + ) elif self.custom_llm_provider == "azure_text": response_obj = self.handle_azure_text_completion_chunk(chunk) completion_obj["content"] = response_obj["text"]