litellm-mirror/litellm/tests/test_ollama.py
2023-08-18 11:05:05 -07:00

61 lines
2.2 KiB
Python

###### THESE TESTS CAN ONLY RUN LOCALLY WITH THE OLLAMA SERVER RUNNING ######
# import aiohttp
# import json
# import asyncio
# import requests
# async def get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"):
# session = aiohttp.ClientSession()
# url = f'{api_base}/api/generate'
# data = {
# "model": model,
# "prompt": prompt,
# }
# response = ""
# try:
# async with session.post(url, json=data) as resp:
# async for line in resp.content.iter_any():
# if line:
# try:
# json_chunk = line.decode("utf-8")
# chunks = json_chunk.split("\n")
# for chunk in chunks:
# if chunk.strip() != "":
# j = json.loads(chunk)
# if "response" in j:
# print(j["response"])
# yield {
# "role": "assistant",
# "content": j["response"]
# }
# # self.responses.append(j["response"])
# # yield "blank"
# except Exception as e:
# print(f"Error decoding JSON: {e}")
# finally:
# await session.close()
# # async def get_ollama_response_no_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"):
# # generator = get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?")
# # response = ""
# # async for elem in generator:
# # print(elem)
# # response += elem["content"]
# # return response
# # #generator = get_ollama_response_stream()
# # result = asyncio.run(get_ollama_response_no_stream())
# # print(result)
# # # return this generator to the client for streaming requests
# # async def get_response():
# # global generator
# # async for elem in generator:
# # print(elem)
# # asyncio.run(get_response())