refactor(openai.py): support aiohttp streaming

2025-04-25 10:44:24 +00:00 · 2023-11-09 16:15:21 -08:00 · 2023-11-09 16:15:21 -08:00 · c053782d96
commit c053782d96
parent bba62b56d3
5 changed files with 108 additions and 42 deletions
--- a/litellm/tests/test_async_fn.py
+++ b/litellm/tests/test_async_fn.py
@ -37,7 +37,7 @@ def test_async_response():

    response = asyncio.run(test_get_response())
 # print(response)
-test_async_response()
+# test_async_response()

 def test_get_response_streaming():
    import asyncio
@ -45,8 +45,6 @@ def test_get_response_streaming():
        user_message = "Hello, how are you?"
        messages = [{"content": user_message, "role": "user"}]
        try:
-            import litellm
-            litellm.set_verbose = True
            response = await acompletion(model="gpt-3.5-turbo", messages=messages, stream=True)
            print(type(response))

@ -56,11 +54,11 @@ def test_get_response_streaming():
            print(is_async_generator)

            output = ""
+            i = 0
            async for chunk in response:
                token = chunk["choices"][0]["delta"].get("content", "")
                output += token
-                print(output)
-
+            print(f"output: {output}")
            assert output is not None, "output cannot be None."
            assert isinstance(output, str), "output needs to be of type str"
            assert len(output) > 0, "Length of output needs to be greater than 0."
@ -71,3 +69,35 @@ def test_get_response_streaming():
    asyncio.run(test_async_call())


+# test_get_response_streaming()
+
+def test_get_response_non_openai_streaming():
+    import asyncio
+    async def test_async_call():
+        user_message = "Hello, how are you?"
+        messages = [{"content": user_message, "role": "user"}]
+        try:
+            response = await acompletion(model="command-nightly", messages=messages, stream=True)
+            print(type(response))
+
+            import inspect
+
+            is_async_generator = inspect.isasyncgen(response)
+            print(is_async_generator)
+
+            output = ""
+            i = 0
+            async for chunk in response:
+                token = chunk["choices"][0]["delta"].get("content", "")
+                output += token
+            print(f"output: {output}")
+            assert output is not None, "output cannot be None."
+            assert isinstance(output, str), "output needs to be of type str"
+            assert len(output) > 0, "Length of output needs to be greater than 0."
+
+        except Exception as e:
+            pytest.fail(f"An exception occurred: {e}")
+        return response
+    asyncio.run(test_async_call())
+
+test_get_response_non_openai_streaming()