(Fixes) OpenAI Streaming Token Counting + Fixes usage track when litellm.turn_off_message_logging=True (#8156)

* working streaming usage tracking * fix test_async_chat_openai_stream_options * fix await asyncio.sleep(1) * test_async_chat_azure * fix s3 logging * fix get_stream_options * fix get_stream_options * fix streaming handler * test_stream_token_counting_with_redaction * fix codeql concern
2025-04-25 18:54:30 +00:00 · 2025-01-31 15:06:37 -08:00 · 2025-01-31 15:06:37 -08:00 · 2cf0daa31c
commit 2cf0daa31c
parent 9f0f2b3f01
8 changed files with 268 additions and 94 deletions
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@ -14,6 +14,7 @@ from typing import (
    Union,
    cast,
 )
+from urllib.parse import urlparse

 import httpx
 import openai
@ -833,8 +834,9 @@ class OpenAIChatCompletion(BaseLLM):
        stream_options: Optional[dict] = None,
    ):
        data["stream"] = True
-        if stream_options is not None:
-            data["stream_options"] = stream_options
+        data.update(
+            self.get_stream_options(stream_options=stream_options, api_base=api_base)
+        )

        openai_client: OpenAI = self._get_openai_client(  # type: ignore
            is_async=False,
@ -893,8 +895,9 @@ class OpenAIChatCompletion(BaseLLM):
    ):
        response = None
        data["stream"] = True
-        if stream_options is not None:
-            data["stream_options"] = stream_options
+        data.update(
+            self.get_stream_options(stream_options=stream_options, api_base=api_base)
+        )
        for _ in range(2):
            try:
                openai_aclient: AsyncOpenAI = self._get_openai_client(  # type: ignore
@ -977,6 +980,20 @@ class OpenAIChatCompletion(BaseLLM):
                            status_code=500, message=f"{str(e)}", headers=error_headers
                        )

+    def get_stream_options(
+        self, stream_options: Optional[dict], api_base: Optional[str]
+    ) -> dict:
+        """
+        Pass `stream_options` to the data dict for OpenAI requests
+        """
+        if stream_options is not None:
+            return {"stream_options": stream_options}
+        else:
+            # by default litellm will include usage for openai endpoints
+            if api_base is None or urlparse(api_base).hostname == "api.openai.com":
+                return {"stream_options": {"include_usage": True}}
+        return {}
+
    # Embedding
    @track_llm_api_timing()
    async def make_openai_embedding_request(