From 48cad985adf0061bdb515494e9c45307d76d0f8b Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 30 Sep 2023 10:22:04 -0700 Subject: [PATCH] add fake streaming for petals --- litellm/main.py | 7 ++++--- litellm/utils.py | 12 ++++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/litellm/main.py b/litellm/main.py index 49a8d82d93..d74c830921 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1099,10 +1099,11 @@ def completion( encoding=encoding, logging_obj=logging ) - if inspect.isgenerator(model_response) or (stream == True): - # don't try to access stream object, + if stream==True: ## [BETA] + # Fake streaming for petals + resp_string = model_response["choices"][0]["message"]["content"] response = CustomStreamWrapper( - model_response, model, custom_llm_provider="petals", logging_obj=logging + resp_string, model, custom_llm_provider="petals", logging_obj=logging ) return response response = model_response diff --git a/litellm/utils.py b/litellm/utils.py index f9a286bf63..49bece66a9 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -3158,6 +3158,18 @@ class CustomStreamWrapper: completion_obj["content"] = new_chunk self.completion_stream = self.completion_stream[chunk_size:] time.sleep(0.05) + elif self.custom_llm_provider == "petals": + if len(self.completion_stream)==0: + if self.sent_last_chunk: + raise StopIteration + else: + model_response.choices[0].finish_reason = "stop" + self.sent_last_chunk = True + chunk_size = 30 + new_chunk = self.completion_stream[:chunk_size] + completion_obj["content"] = new_chunk + self.completion_stream = self.completion_stream[chunk_size:] + time.sleep(0.05) elif self.custom_llm_provider == "palm": # fake streaming if len(self.completion_stream)==0: