(Fixes) OpenAI Streaming Token Counting + Fixes usage track when litellm.turn_off_message_logging=True (#8156)

* working streaming usage tracking * fix test_async_chat_openai_stream_options * fix await asyncio.sleep(1) * test_async_chat_azure * fix s3 logging * fix get_stream_options * fix get_stream_options * fix streaming handler * test_stream_token_counting_with_redaction * fix codeql concern
2025-04-25 18:54:30 +00:00 · 2025-01-31 15:06:37 -08:00 · 2025-01-31 15:06:37 -08:00 · ef6ab91ac2
commit ef6ab91ac2
parent 38b4980018
8 changed files with 268 additions and 94 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -166,7 +166,6 @@ with resources.open_text(
 # Convert to str (if necessary)
 claude_json_str = json.dumps(json_data)
 import importlib.metadata
-from concurrent.futures import ThreadPoolExecutor
 from typing import (
    TYPE_CHECKING,
    Any,
@ -185,6 +184,7 @@ from typing import (

 from openai import OpenAIError as OriginalError

+from litellm.litellm_core_utils.thread_pool_executor import executor
 from litellm.llms.base_llm.audio_transcription.transformation import (
    BaseAudioTranscriptionConfig,
 )
@ -235,10 +235,6 @@ from .types.router import LiteLLM_Params

 ####### ENVIRONMENT VARIABLES ####################
 # Adjust to your specific application needs / system capabilities.
-MAX_THREADS = 100
-
-# Create a ThreadPoolExecutor
-executor = ThreadPoolExecutor(max_workers=MAX_THREADS)
 sentry_sdk_instance = None
 capture_exception = None
 add_breadcrumb = None