mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
100 lines
3.6 KiB
Python
100 lines
3.6 KiB
Python
##### THESE TESTS CAN ONLY RUN LOCALLY WITH THE OLLAMA SERVER RUNNING ######
|
|
# import aiohttp
|
|
# import json
|
|
# import asyncio
|
|
# import requests
|
|
#
|
|
# async def get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"):
|
|
# session = aiohttp.ClientSession()
|
|
# url = f'{api_base}/api/generate'
|
|
# data = {
|
|
# "model": model,
|
|
# "prompt": prompt,
|
|
# }
|
|
|
|
# response = ""
|
|
|
|
# try:
|
|
# async with session.post(url, json=data) as resp:
|
|
# async for line in resp.content.iter_any():
|
|
# if line:
|
|
# try:
|
|
# json_chunk = line.decode("utf-8")
|
|
# chunks = json_chunk.split("\n")
|
|
# for chunk in chunks:
|
|
# if chunk.strip() != "":
|
|
# j = json.loads(chunk)
|
|
# if "response" in j:
|
|
# print(j["response"])
|
|
# yield {
|
|
# "role": "assistant",
|
|
# "content": j["response"]
|
|
# }
|
|
# # self.responses.append(j["response"])
|
|
# # yield "blank"
|
|
# except Exception as e:
|
|
# print(f"Error decoding JSON: {e}")
|
|
# finally:
|
|
# await session.close()
|
|
|
|
# async def get_ollama_response_no_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"):
|
|
# generator = get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?")
|
|
# response = ""
|
|
# async for elem in generator:
|
|
# print(elem)
|
|
# response += elem["content"]
|
|
# return response
|
|
|
|
# #generator = get_ollama_response_stream()
|
|
|
|
# result = asyncio.run(get_ollama_response_no_stream())
|
|
# print(result)
|
|
|
|
# # return this generator to the client for streaming requests
|
|
|
|
|
|
# async def get_response():
|
|
# global generator
|
|
# async for elem in generator:
|
|
# print(elem)
|
|
|
|
# asyncio.run(get_response())
|
|
|
|
|
|
|
|
##### latest implementation of making raw http post requests to local ollama server
|
|
|
|
# import requests
|
|
# import json
|
|
# def get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"):
|
|
# url = f"{api_base}/api/generate"
|
|
# data = {
|
|
# "model": model,
|
|
# "prompt": prompt,
|
|
# }
|
|
# session = requests.Session()
|
|
|
|
# with session.post(url, json=data, stream=True) as resp:
|
|
# for line in resp.iter_lines():
|
|
# if line:
|
|
# try:
|
|
# json_chunk = line.decode("utf-8")
|
|
# chunks = json_chunk.split("\n")
|
|
# for chunk in chunks:
|
|
# if chunk.strip() != "":
|
|
# j = json.loads(chunk)
|
|
# if "response" in j:
|
|
# completion_obj = {
|
|
# "role": "assistant",
|
|
# "content": "",
|
|
# }
|
|
# completion_obj["content"] = j["response"]
|
|
# yield {"choices": [{"delta": completion_obj}]}
|
|
# except Exception as e:
|
|
# print(f"Error decoding JSON: {e}")
|
|
# session.close()
|
|
|
|
# response = get_ollama_response_stream()
|
|
|
|
# for chunk in response:
|
|
# print(chunk['choices'][0]['delta'])
|