diff --git a/litellm/main.py b/litellm/main.py index 262b5f8ff..8f00cbc88 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1600,7 +1600,7 @@ def completion( response = CustomStreamWrapper( response, model, - custom_llm_provider="anthropic", + custom_llm_provider="cloudflare", logging_obj=logging, ) diff --git a/litellm/utils.py b/litellm/utils.py index a1f074931..0b4477eb6 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6903,6 +6903,36 @@ class CustomStreamWrapper: traceback.print_exc() return "" + def handle_cloudlfare_stream(self, chunk): + try: + print_verbose(f"\nRaw OpenAI Chunk\n{chunk}\n") + chunk = chunk.decode("utf-8") + str_line = chunk + text = "" + is_finished = False + finish_reason = None + + if "[DONE]" in chunk: + return {"text": text, "is_finished": True, "finish_reason": "stop"} + elif str_line.startswith("data:"): + data_json = json.loads(str_line[5:]) + print_verbose(f"delta content: {data_json}") + text = data_json["response"] + return { + "text": text, + "is_finished": is_finished, + "finish_reason": finish_reason, + } + else: + return { + "text": text, + "is_finished": is_finished, + "finish_reason": finish_reason, + } + + except Exception as e: + raise e + def handle_ollama_stream(self, chunk): try: if isinstance(chunk, dict): @@ -7192,6 +7222,14 @@ class CustomStreamWrapper: model_response.choices[0].finish_reason = response_obj[ "finish_reason" ] + elif self.custom_llm_provider == "cloudflare": + response_obj = self.handle_cloudlfare_stream(chunk) + completion_obj["content"] = response_obj["text"] + print_verbose(f"completion obj content: {completion_obj['content']}") + if response_obj["is_finished"]: + model_response.choices[0].finish_reason = response_obj[ + "finish_reason" + ] elif self.custom_llm_provider == "text-completion-openai": response_obj = self.handle_openai_text_completion_chunk(chunk) completion_obj["content"] = response_obj["text"]