(Fixes) OpenAI Streaming Token Counting + Fixes usage track when litellm.turn_off_message_logging=True (#8156)

* working streaming usage tracking * fix test_async_chat_openai_stream_options * fix await asyncio.sleep(1) * test_async_chat_azure * fix s3 logging * fix get_stream_options * fix get_stream_options * fix streaming handler * test_stream_token_counting_with_redaction * fix codeql concern
2025-04-26 03:04:13 +00:00 · 2025-01-31 15:06:37 -08:00 · 2025-01-31 15:06:37 -08:00 · 2cf0daa31c
commit 2cf0daa31c
parent 9f0f2b3f01
8 changed files with 268 additions and 94 deletions
--- a/tests/local_testing/test_custom_callback_input.py
+++ b/tests/local_testing/test_custom_callback_input.py
@ -418,6 +418,8 @@ async def test_async_chat_openai_stream():
        )
        async for chunk in response:
            continue
+
+        await asyncio.sleep(1)
        ## test failure callback
        try:
            response = await litellm.acompletion(
@ -428,6 +430,7 @@ async def test_async_chat_openai_stream():
            )
            async for chunk in response:
                continue
+            await asyncio.sleep(1)
        except Exception:
            pass
        time.sleep(1)
@ -499,6 +502,8 @@ async def test_async_chat_azure_stream():
        )
        async for chunk in response:
            continue
+
+        await asyncio.sleep(1)
        # test failure callback
        try:
            response = await litellm.acompletion(
@ -509,6 +514,7 @@ async def test_async_chat_azure_stream():
            )
            async for chunk in response:
                continue
+            await asyncio.sleep(1)
        except Exception:
            pass
        await asyncio.sleep(1)
@ -540,6 +546,8 @@ async def test_async_chat_openai_stream_options():

            async for chunk in response:
                continue
+
+            await asyncio.sleep(1)
            print("mock client args list=", mock_client.await_args_list)
            mock_client.assert_awaited_once()
    except Exception as e:
@ -607,6 +615,8 @@ async def test_async_chat_bedrock_stream():
        async for chunk in response:
            print(f"chunk: {chunk}")
            continue
+
+        await asyncio.sleep(1)
        ## test failure callback
        try:
            response = await litellm.acompletion(
@ -617,6 +627,8 @@ async def test_async_chat_bedrock_stream():
            )
            async for chunk in response:
                continue
+
+            await asyncio.sleep(1)
        except Exception:
            pass
        await asyncio.sleep(1)
@ -770,6 +782,8 @@ async def test_async_text_completion_bedrock():
        async for chunk in response:
            print(f"chunk: {chunk}")
            continue
+
+        await asyncio.sleep(1)
        ## test failure callback
        try:
            response = await litellm.atext_completion(
@ -780,6 +794,8 @@ async def test_async_text_completion_bedrock():
            )
            async for chunk in response:
                continue
+
+            await asyncio.sleep(1)
        except Exception:
            pass
        time.sleep(1)
@ -809,6 +825,8 @@ async def test_async_text_completion_openai_stream():
        async for chunk in response:
            print(f"chunk: {chunk}")
            continue
+
+        await asyncio.sleep(1)
        ## test failure callback
        try:
            response = await litellm.atext_completion(
@ -819,6 +837,8 @@ async def test_async_text_completion_openai_stream():
            )
            async for chunk in response:
                continue
+
+            await asyncio.sleep(1)
        except Exception:
            pass
        time.sleep(1)