From f27a93a4dcb29a585af65cdd40dcfafdd5aaef03 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 25 Dec 2023 23:38:47 +0530 Subject: [PATCH] (feat) ollama_chat - streaming --- litellm/utils.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/litellm/utils.py b/litellm/utils.py index 33673d4e4..ee0de81c4 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6781,6 +6781,41 @@ class CustomStreamWrapper: except Exception as e: raise e + def handle_ollama_chat_stream(self, chunk): + # for ollama_chat/ provider + try: + if isinstance(chunk, dict): + json_chunk = chunk + else: + json_chunk = json.loads(chunk) + if "error" in json_chunk: + raise Exception(f"Ollama Error - {json_chunk}") + + text = "" + is_finished = False + finish_reason = None + if json_chunk["done"] == True: + text = "" + is_finished = True + finish_reason = "stop" + return { + "text": text, + "is_finished": is_finished, + "finish_reason": finish_reason, + } + elif "message" in json_chunk: + print_verbose(f"delta content: {json_chunk}") + text = json_chunk["message"]["content"] + return { + "text": text, + "is_finished": is_finished, + "finish_reason": finish_reason, + } + else: + raise Exception(f"Ollama Error - {json_chunk}") + except Exception as e: + raise e + def handle_bedrock_stream(self, chunk): if hasattr(chunk, "get"): chunk = chunk.get("chunk") @@ -6993,6 +7028,14 @@ class CustomStreamWrapper: model_response.choices[0].finish_reason = response_obj[ "finish_reason" ] + elif self.custom_llm_provider == "ollama_chat": + response_obj = self.handle_ollama_chat_stream(chunk) + completion_obj["content"] = response_obj["text"] + print_verbose(f"completion obj content: {completion_obj['content']}") + if response_obj["is_finished"]: + model_response.choices[0].finish_reason = response_obj[ + "finish_reason" + ] elif self.custom_llm_provider == "text-completion-openai": response_obj = self.handle_openai_text_completion_chunk(chunk) completion_obj["content"] = response_obj["text"]