litellm-mirror/litellm/tests/test_ollama_local.py

##### THESE TESTS CAN ONLY RUN LOCALLY WITH THE OLLAMA SERVER RUNNING ######
# https://ollama.ai/

import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import os
sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
import pytest
import litellm
from litellm import embedding, completion
import asyncio


user_message = "respond in 20 words. who are you?"
messages = [{ "content": user_message,"role": "user"}]

async def test_async_ollama_streaming():
    try:
        litellm.set_verbose = True
        response = await litellm.acompletion(model="ollama/mistral-openorca",
                                             messages=[{"role": "user", "content": "Hey, how's it going?"}],
                                             stream=True)
        async for chunk in response:
            print(chunk)
    except Exception as e:
        print(e)

asyncio.run(test_async_ollama_streaming())

def test_completion_ollama():
    try:
        response = completion(
            model="ollama/llama2",
            messages=messages,
            max_tokens=200,
            request_timeout = 10,

        )
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

# test_completion_ollama()

def test_completion_ollama_with_api_base():
    try:
        response = completion(
            model="ollama/llama2",
            messages=messages,
            api_base="http://localhost:11434"
        )
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

# test_completion_ollama_with_api_base()


def test_completion_ollama_custom_prompt_template():
    user_message = "what is litellm?"
    litellm.register_prompt_template(
        model="ollama/llama2",
        roles={
            "system": {"pre_message": "System: "},
            "user": {"pre_message": "User: "},
            "assistant": {"pre_message": "Assistant: "}
        }
    )
    messages = [{ "content": user_message,"role": "user"}]
    litellm.set_verbose = True
    try:
        response = completion(
            model="ollama/llama2",
            messages=messages,
            stream=True
        )
        print(response)
        for chunk in response:
            print(chunk)
            # print(chunk['choices'][0]['delta'])

    except Exception as e:
        traceback.print_exc()
        pytest.fail(f"Error occurred: {e}")

# test_completion_ollama_custom_prompt_template()

async def test_completion_ollama_async_stream():
    user_message = "what is the weather"
    messages = [{ "content": user_message,"role": "user"}]
    try:
        response = await litellm.acompletion(
            model="ollama/llama2",
            messages=messages,
            api_base="http://localhost:11434",
            stream=True
        )
        async for chunk in response:
            print(chunk['choices'][0]['delta'])


        print("TEST ASYNC NON Stream")
        response = await litellm.acompletion(
            model="ollama/llama2",
            messages=messages,
            api_base="http://localhost:11434",
        )
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

# import asyncio
# asyncio.run(test_completion_ollama_async_stream())


def prepare_messages_for_chat(text: str) -> list:
    messages = [
        {"role": "user", "content": text},
    ]
    return messages


async def ask_question():
    params = {
        "messages": prepare_messages_for_chat("What is litellm? tell me 10 things about it who is sihaan.write an essay"),
        "api_base": "http://localhost:11434",
        "model": "ollama/llama2",
        "stream": True,
    }
    response = await litellm.acompletion(**params)
    return response

async def main():
    response = await ask_question()
    async for chunk in response:
        print(chunk)

    print("test async completion without streaming")
    response = await litellm.acompletion(
        model="ollama/llama2",
        messages=prepare_messages_for_chat("What is litellm? respond in 2 words"),
    )
    print("response", response)


def test_completion_expect_error():
    # this tests if we can exception map correctly for ollama
    print("making ollama request")
    # litellm.set_verbose=True
    user_message = "what is litellm?"
    messages = [{ "content": user_message,"role": "user"}]
    try:
        response = completion(
            model="ollama/invalid",
            messages=messages,
            stream=True
        )
        print(response)
        for chunk in response:
            print(chunk)
            # print(chunk['choices'][0]['delta'])

    except Exception as e:
        pass
        pytest.fail(f"Error occurred: {e}")

# test_completion_expect_error()

# if __name__ == "__main__":
#     import asyncio
#     asyncio.run(main())