(feat) add streaming for text_completion

2025-04-25 18:54:30 +00:00 · 2023-11-08 11:58:07 -08:00 · 2023-11-08 11:58:07 -08:00 · 2a751c277f
commit 2a751c277f
parent a404b0fc3b
2 changed files with 41 additions and 0 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -59,6 +59,7 @@ encoding = tiktoken.get_encoding("cl100k_base")
 from litellm.utils import (
    get_secret,
    CustomStreamWrapper,
+    TextCompletionStreamWrapper,
    ModelResponse,
    TextCompletionResponse,
    TextChoices,
@ -2031,6 +2032,9 @@ def text_completion(
            **kwargs,
            **optional_params,
        )
+        if stream == True or kwargs.get("stream", False) == True:
+            response = TextCompletionStreamWrapper(completion_stream=response, model=model)
+            return response

        transformed_logprobs = None
        # only supported for TGI models
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -4156,6 +4156,43 @@ class CustomStreamWrapper:
        except StopIteration:
            raise StopAsyncIteration

+class TextCompletionStreamWrapper:
+    def __init__(self, completion_stream, model):
+        self.completion_stream = completion_stream
+        self.model = model
+
+    def __iter__(self):
+        return self
+
+    def __aiter__(self):
+        return self
+
+    def __next__(self):
+        # model_response = ModelResponse(stream=True, model=self.model)
+        response = TextCompletionResponse()
+        try:
+            while True: # loop until a non-empty string is found
+                # return this for all models
+                chunk = next(self.completion_stream)
+                response["id"] = chunk.get("id", None)
+                response["object"] = "text_completion"
+                response["created"] = response.get("created", None)
+                response["model"] = response.get("model", None)
+                text_choices = TextChoices()
+                text_choices["text"] = chunk["choices"][0]["delta"]["content"]
+                text_choices["index"] = response["choices"][0]["index"]
+                text_choices["finish_reason"] = response["choices"][0]["finish_reason"]
+                response["choices"] = [text_choices]
+                return response
+        except StopIteration:
+            raise StopIteration
+        except Exception as e: 
+            print(f"got exception {e}")
+    async def __anext__(self):
+        try:
+            return next(self)
+        except StopIteration:
+            raise StopAsyncIteration

 def mock_completion_streaming_obj(model_response, mock_response, model):
    for i in range(0, len(mock_response), 3):