litellm-mirror/litellm/tests/test_ollama_local.py

174 lines
4.8 KiB
Python

##### THESE TESTS CAN ONLY RUN LOCALLY WITH THE OLLAMA SERVER RUNNING ######
# https://ollama.ai/
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import os
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
import pytest
import litellm
from litellm import embedding, completion
import asyncio
user_message = "respond in 20 words. who are you?"
messages = [{ "content": user_message,"role": "user"}]
async def test_async_ollama_streaming():
try:
litellm.set_verbose = True
response = await litellm.acompletion(model="ollama/mistral-openorca",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
stream=True)
async for chunk in response:
print(chunk)
except Exception as e:
print(e)
asyncio.run(test_async_ollama_streaming())
def test_completion_ollama():
try:
response = completion(
model="ollama/llama2",
messages=messages,
max_tokens=200,
request_timeout = 10,
)
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_ollama()
def test_completion_ollama_with_api_base():
try:
response = completion(
model="ollama/llama2",
messages=messages,
api_base="http://localhost:11434"
)
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_ollama_with_api_base()
def test_completion_ollama_custom_prompt_template():
user_message = "what is litellm?"
litellm.register_prompt_template(
model="ollama/llama2",
roles={
"system": {"pre_message": "System: "},
"user": {"pre_message": "User: "},
"assistant": {"pre_message": "Assistant: "}
}
)
messages = [{ "content": user_message,"role": "user"}]
litellm.set_verbose = True
try:
response = completion(
model="ollama/llama2",
messages=messages,
stream=True
)
print(response)
for chunk in response:
print(chunk)
# print(chunk['choices'][0]['delta'])
except Exception as e:
traceback.print_exc()
pytest.fail(f"Error occurred: {e}")
# test_completion_ollama_custom_prompt_template()
async def test_completion_ollama_async_stream():
user_message = "what is the weather"
messages = [{ "content": user_message,"role": "user"}]
try:
response = await litellm.acompletion(
model="ollama/llama2",
messages=messages,
api_base="http://localhost:11434",
stream=True
)
async for chunk in response:
print(chunk['choices'][0]['delta'])
print("TEST ASYNC NON Stream")
response = await litellm.acompletion(
model="ollama/llama2",
messages=messages,
api_base="http://localhost:11434",
)
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# import asyncio
# asyncio.run(test_completion_ollama_async_stream())
def prepare_messages_for_chat(text: str) -> list:
messages = [
{"role": "user", "content": text},
]
return messages
async def ask_question():
params = {
"messages": prepare_messages_for_chat("What is litellm? tell me 10 things about it who is sihaan.write an essay"),
"api_base": "http://localhost:11434",
"model": "ollama/llama2",
"stream": True,
}
response = await litellm.acompletion(**params)
return response
async def main():
response = await ask_question()
async for chunk in response:
print(chunk)
print("test async completion without streaming")
response = await litellm.acompletion(
model="ollama/llama2",
messages=prepare_messages_for_chat("What is litellm? respond in 2 words"),
)
print("response", response)
def test_completion_expect_error():
# this tests if we can exception map correctly for ollama
print("making ollama request")
# litellm.set_verbose=True
user_message = "what is litellm?"
messages = [{ "content": user_message,"role": "user"}]
try:
response = completion(
model="ollama/invalid",
messages=messages,
stream=True
)
print(response)
for chunk in response:
print(chunk)
# print(chunk['choices'][0]['delta'])
except Exception as e:
pass
pytest.fail(f"Error occurred: {e}")
# test_completion_expect_error()
# if __name__ == "__main__":
# import asyncio
# asyncio.run(main())