olla upgrades, fix streaming, add non streaming resp

2025-04-25 10:44:24 +00:00 · 2023-09-09 14:07:11 -07:00 · 2023-09-09 14:07:11 -07:00 · 56bd8c1c52
commit 56bd8c1c52
parent 6cb03d7c63
5 changed files with 135 additions and 86 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2217,51 +2217,6 @@ def read_config_args(config_path):
        print("An error occurred while reading config:", str(e))
        raise e

-
-########## ollama implementation ############################
-
-
-async def get_ollama_response_stream(
-    api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"
-):
-    session = aiohttp.ClientSession()
-    url = f"{api_base}/api/generate"
-    data = {
-        "model": model,
-        "prompt": prompt,
-    }
-    try:
-        async with session.post(url, json=data) as resp:
-            async for line in resp.content.iter_any():
-                if line:
-                    try:
-                        json_chunk = line.decode("utf-8")
-                        chunks = json_chunk.split("\n")
-                        for chunk in chunks:
-                            if chunk.strip() != "":
-                                j = json.loads(chunk)
-                                if "response" in j:
-                                    completion_obj = {
-                                        "role": "assistant",
-                                        "content": "",
-                                    }
-                                    completion_obj["content"] = j["response"]
-                                    yield {"choices": [{"delta": completion_obj}]}
-                                    # self.responses.append(j["response"])
-                                    # yield "blank"
-                    except Exception as e:
-                        print(f"Error decoding JSON: {e}")
-    finally:
-        await session.close()
-
-
-async def stream_to_string(generator):
-    response = ""
-    async for chunk in generator:
-        response += chunk["content"]
-    return response
-
-
 ########## experimental completion variants ############################

 def get_model_split_test(models, completion_call_id):