From 48cad985adf0061bdb515494e9c45307d76d0f8b Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 30 Sep 2023 10:22:04 -0700
Subject: [PATCH] add fake streaming for petals

---
 litellm/main.py  |  7 ++++---
 litellm/utils.py | 12 ++++++++++++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/litellm/main.py b/litellm/main.py
index 49a8d82d93..d74c830921 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -1099,10 +1099,11 @@ def completion(
                 encoding=encoding, 
                 logging_obj=logging
             )
-            if inspect.isgenerator(model_response) or (stream == True):
-                # don't try to access stream object,
+            if stream==True: ## [BETA]
+                # Fake streaming for petals
+                resp_string = model_response["choices"][0]["message"]["content"]
                 response = CustomStreamWrapper(
-                    model_response, model, custom_llm_provider="petals", logging_obj=logging
+                    resp_string, model, custom_llm_provider="petals", logging_obj=logging
                 )
                 return response
             response = model_response
diff --git a/litellm/utils.py b/litellm/utils.py
index f9a286bf63..49bece66a9 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -3158,6 +3158,18 @@ class CustomStreamWrapper:
                     completion_obj["content"] = new_chunk
                     self.completion_stream = self.completion_stream[chunk_size:]
                     time.sleep(0.05)
+                elif self.custom_llm_provider == "petals":
+                    if len(self.completion_stream)==0:
+                        if self.sent_last_chunk: 
+                            raise StopIteration
+                        else:
+                            model_response.choices[0].finish_reason = "stop"
+                            self.sent_last_chunk = True
+                    chunk_size = 30
+                    new_chunk = self.completion_stream[:chunk_size]
+                    completion_obj["content"] = new_chunk
+                    self.completion_stream = self.completion_stream[chunk_size:]
+                    time.sleep(0.05)
                 elif self.custom_llm_provider == "palm":
                     # fake streaming
                     if len(self.completion_stream)==0: