fix(main.py): fix async text completion streaming + add new tests

2023-12-29 11:33:28 +05:30 · 2023-12-29 11:33:28 +05:30 · 6f2734100f
commit 6f2734100f
parent 2b8e2bd937
2 changed files with 41 additions and 14 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -2472,22 +2472,22 @@ async def atext_completion(*args, **kwargs):
            or custom_llm_provider == "ollama"
            or custom_llm_provider == "vertex_ai"
        ):  # currently implemented aiohttp calls for just azure and openai, soon all.
-            if kwargs.get("stream", False):
-                response = text_completion(*args, **kwargs)
-            else:
-                # Await normally
-                response = await loop.run_in_executor(None, func_with_context)
-                if asyncio.iscoroutine(response):
-                    response = await response
+            # Await normally
+            response = await loop.run_in_executor(None, func_with_context)
+            if asyncio.iscoroutine(response):
+                response = await response
        else:
            # Call the synchronous function using run_in_executor
            response = await loop.run_in_executor(None, func_with_context)
-        if kwargs.get("stream", False):  # return an async generator
-            return _async_streaming(
-                response=response,
+        if kwargs.get("stream", False) == True:  # return an async generator
+            return TextCompletionStreamWrapper(
+                completion_stream=_async_streaming(
+                    response=response,
+                    model=model,
+                    custom_llm_provider=custom_llm_provider,
+                    args=args,
+                ),
                model=model,
-                custom_llm_provider=custom_llm_provider,
-                args=args,
            )
        else:
            return response
@ -2691,11 +2691,11 @@ def text_completion(
        **kwargs,
        **optional_params,
    )
+    if kwargs.get("acompletion", False) == True:
+        return response
    if stream == True or kwargs.get("stream", False) == True:
        response = TextCompletionStreamWrapper(completion_stream=response, model=model)
        return response
-    if kwargs.get("acompletion", False) == True:
-        return response
    transformed_logprobs = None
    # only supported for TGI models
    try:
--- a/litellm/tests/test_async_fn.py
+++ b/litellm/tests/test_async_fn.py
@ -215,3 +215,30 @@ def test_get_response_non_openai_streaming():


 # test_get_response_non_openai_streaming()
+
+
+async def test_get_response():
+    try:
+        response = await litellm.atext_completion(
+            model="gpt-3.5-turbo",
+            prompt="good morning",
+            stream=True,
+            max_tokens=10,
+        )
+        print(f"response: {response}")
+
+        num_finish_reason = 0
+        async for chunk in response:
+            print(chunk)
+            if chunk["choices"][0].get("finish_reason") is not None:
+                num_finish_reason += 1
+                print("finish_reason", chunk["choices"][0].get("finish_reason"))
+
+        assert (
+            num_finish_reason == 1
+        ), f"expected only one finish reason. Got {num_finish_reason}"
+    except Exception as e:
+        pytest.fail(f"GOT exception for gpt-3.5 instruct In streaming{e}")
+
+
+# asyncio.run(test_get_response())