(Fixes) OpenAI Streaming Token Counting + Fixes usage track when litellm.turn_off_message_logging=True (#8156)

* working streaming usage tracking

* fix test_async_chat_openai_stream_options

* fix await asyncio.sleep(1)

* test_async_chat_azure

* fix s3 logging

* fix get_stream_options

* fix get_stream_options

* fix streaming handler

* test_stream_token_counting_with_redaction

* fix codeql concern
This commit is contained in:
Ishaan Jaff 2025-01-31 15:06:37 -08:00 committed by GitHub
parent 9f0f2b3f01
commit 2cf0daa31c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 268 additions and 94 deletions

View file

@ -14,6 +14,7 @@ from typing import (
Union,
cast,
)
from urllib.parse import urlparse
import httpx
import openai
@ -833,8 +834,9 @@ class OpenAIChatCompletion(BaseLLM):
stream_options: Optional[dict] = None,
):
data["stream"] = True
if stream_options is not None:
data["stream_options"] = stream_options
data.update(
self.get_stream_options(stream_options=stream_options, api_base=api_base)
)
openai_client: OpenAI = self._get_openai_client( # type: ignore
is_async=False,
@ -893,8 +895,9 @@ class OpenAIChatCompletion(BaseLLM):
):
response = None
data["stream"] = True
if stream_options is not None:
data["stream_options"] = stream_options
data.update(
self.get_stream_options(stream_options=stream_options, api_base=api_base)
)
for _ in range(2):
try:
openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore
@ -977,6 +980,20 @@ class OpenAIChatCompletion(BaseLLM):
status_code=500, message=f"{str(e)}", headers=error_headers
)
def get_stream_options(
self, stream_options: Optional[dict], api_base: Optional[str]
) -> dict:
"""
Pass `stream_options` to the data dict for OpenAI requests
"""
if stream_options is not None:
return {"stream_options": stream_options}
else:
# by default litellm will include usage for openai endpoints
if api_base is None or urlparse(api_base).hostname == "api.openai.com":
return {"stream_options": {"include_usage": True}}
return {}
# Embedding
@track_llm_api_timing()
async def make_openai_embedding_request(