(Fixes) OpenAI Streaming Token Counting + Fixes usage track when litellm.turn_off_message_logging=True (#8156)

* working streaming usage tracking * fix test_async_chat_openai_stream_options * fix await asyncio.sleep(1) * test_async_chat_azure * fix s3 logging * fix get_stream_options * fix get_stream_options * fix streaming handler * test_stream_token_counting_with_redaction * fix codeql concern
2025-04-26 03:04:13 +00:00 · 2025-01-31 15:06:37 -08:00 · 2025-01-31 15:06:37 -08:00 · 2cf0daa31c
commit 2cf0daa31c
parent 9f0f2b3f01
8 changed files with 268 additions and 94 deletions
--- a/tests/local_testing/test_custom_callback_router.py
+++ b/tests/local_testing/test_custom_callback_router.py
@ -381,7 +381,7 @@ class CompletionCustomHandler(

 # Simple Azure OpenAI call
 ## COMPLETION
-@pytest.mark.flaky(retries=5, delay=1)
+# @pytest.mark.flaky(retries=5, delay=1)
@pytest.mark.asyncio
 async def test_async_chat_azure():
    try:
@ -427,11 +427,11 @@ async def test_async_chat_azure():
        async for chunk in response:
            print(f"async azure router chunk: {chunk}")
            continue
-        await asyncio.sleep(1)
+        await asyncio.sleep(2)
        print(f"customHandler.states: {customHandler_streaming_azure_router.states}")
        assert len(customHandler_streaming_azure_router.errors) == 0
        assert (
-            len(customHandler_streaming_azure_router.states) >= 4
+            len(customHandler_streaming_azure_router.states) >= 3
        )  # pre, post, stream (multiple times), success
        # failure
        model_list = [