From 65e6b05f5ba9e09f9cefcca27e0b150220530935 Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Sat, 12 Aug 2023 12:05:51 -0700 Subject: [PATCH 1/7] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 955d6a62d4..164cbb927a 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingfac - exception mapping - common exceptions across providers are mapped to the [OpenAI exception types](https://help.openai.com/en/articles/6897213-openai-library-error-types-guidance) # usage Demo - https://litellm.ai/ \ -Read the docs - https://litellm.readthedocs.io/en/latest/ +Read the docs - https://docs.litellm.ai/docs/ ## quick start ``` From 9d644a5634ef8c3b68cb1ae5323b6cb4a76133e1 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 12 Aug 2023 14:49:49 -0700 Subject: [PATCH 2/7] ollama with streaming --- litellm/main.py | 10 +++++ litellm/tests/test_ollama.py | 62 ++++++++++++++++++++++++++++++ litellm/tests/test_ollama_local.py | 52 +++++++++++++++++++++++++ litellm/utils.py | 44 ++++++++++++++++++++- 4 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 litellm/tests/test_ollama.py create mode 100644 litellm/tests/test_ollama_local.py diff --git a/litellm/main.py b/litellm/main.py index 4c3d75bf55..9bee0edd2f 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -9,6 +9,7 @@ import tiktoken from concurrent.futures import ThreadPoolExecutor encoding = tiktoken.get_encoding("cl100k_base") from litellm.utils import get_secret, install_and_import, CustomStreamWrapper, read_config_args +from litellm.utils import get_ollama_response_stream, stream_to_string ####### ENVIRONMENT VARIABLES ################### dotenv.load_dotenv() # Loading env variables using dotenv new_response = { @@ -426,6 +427,15 @@ def completion( model_response["created"] = time.time() model_response["model"] = model response = model_response + elif custom_llm_provider == "ollama": + endpoint = litellm.api_base if litellm.api_base is not None else custom_api_base + prompt = " ".join([message["content"] for message in messages]) + + ## LOGGING + logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn) + generator = get_ollama_response_stream(endpoint, model, prompt) + # assume all responses are streamed + return generator else: ## LOGGING logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) diff --git a/litellm/tests/test_ollama.py b/litellm/tests/test_ollama.py new file mode 100644 index 0000000000..d954145604 --- /dev/null +++ b/litellm/tests/test_ollama.py @@ -0,0 +1,62 @@ +###### THESE TESTS CAN ONLY RUN LOCALLY WITH THE OLLAMA SERVER RUNNING ###### +# import aiohttp +# import json +# import asyncio +# import requests + +# async def get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"): +# session = aiohttp.ClientSession() +# url = f'{api_base}/api/generate' +# data = { +# "model": model, +# "prompt": prompt, +# } + +# response = "" + +# try: +# async with session.post(url, json=data) as resp: +# async for line in resp.content.iter_any(): +# if line: +# try: +# json_chunk = line.decode("utf-8") +# chunks = json_chunk.split("\n") +# for chunk in chunks: +# if chunk.strip() != "": +# j = json.loads(chunk) +# if "response" in j: +# print(j["response"]) +# yield { +# "role": "assistant", +# "content": j["response"] +# } +# # self.responses.append(j["response"]) +# # yield "blank" +# except Exception as e: +# print(f"Error decoding JSON: {e}") +# finally: +# await session.close() + +# # async def get_ollama_response_no_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"): +# # generator = get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?") +# # response = "" +# # async for elem in generator: +# # print(elem) +# # response += elem["content"] +# # return response + +# # #generator = get_ollama_response_stream() + +# # result = asyncio.run(get_ollama_response_no_stream()) +# # print(result) + +# # # return this generator to the client for streaming requests + + + +# # async def get_response(): +# # global generator +# # async for elem in generator: +# # print(elem) + +# # asyncio.run(get_response()) diff --git a/litellm/tests/test_ollama_local.py b/litellm/tests/test_ollama_local.py new file mode 100644 index 0000000000..22544f4cfc --- /dev/null +++ b/litellm/tests/test_ollama_local.py @@ -0,0 +1,52 @@ +###### THESE TESTS CAN ONLY RUN LOCALLY WITH THE OLLAMA SERVER RUNNING ###### + +# import sys, os +# import traceback +# from dotenv import load_dotenv +# load_dotenv() +# import os +# sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path +# import pytest +# import litellm +# from litellm import embedding, completion +# import asyncio + + + +# user_message = "respond in 20 words. who are you?" +# messages = [{ "content": user_message,"role": "user"}] + +# async def get_response(generator): +# response = "" +# async for elem in generator: +# print(elem) +# response += elem["content"] +# return response + +# def test_completion_ollama(): +# try: +# response = completion(model="llama2", messages=messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama") +# print(response) +# string_response = asyncio.run(get_response(response)) +# print(string_response) +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") + + +# # test_completion_ollama() + +# def test_completion_ollama_stream(): +# try: +# response = completion(model="llama2", messages=messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True) +# print(response) +# string_response = asyncio.run(get_response(response)) +# print(string_response) +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") + +# test_completion_ollama_stream() + + + + + diff --git a/litellm/utils.py b/litellm/utils.py index 65cd96a8e7..2f8372e519 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -743,4 +743,46 @@ def read_config_args(config_path): return config except Exception as e: print("An error occurred while reading config:", str(e)) - raise e \ No newline at end of file + raise e + + +########## ollama implementation ############################ +import aiohttp +async def get_ollama_response_stream(api_base="http://localhost:11434", model="llama2", prompt="Why is the sky blue?"): + session = aiohttp.ClientSession() + url = f'{api_base}/api/generate' + data = { + "model": model, + "prompt": prompt, + } + try: + async with session.post(url, json=data) as resp: + async for line in resp.content.iter_any(): + if line: + try: + json_chunk = line.decode("utf-8") + chunks = json_chunk.split("\n") + for chunk in chunks: + if chunk.strip() != "": + j = json.loads(chunk) + if "response" in j: + print(j["response"]) + yield { + "role": "assistant", + "content": j["response"] + } + # self.responses.append(j["response"]) + # yield "blank" + except Exception as e: + print(f"Error decoding JSON: {e}") + finally: + await session.close() + + +async def stream_to_string(generator): + response = "" + async for chunk in generator: + response += chunk["content"] + return response + + From fcecb30d29ba395d4c0ba626990b416feb2bd194 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 12 Aug 2023 14:54:28 -0700 Subject: [PATCH 3/7] bump v for ollama --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d0094c1074..207e96b5e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.383" +version = "0.1.384" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License" From 406290b9e4102ced97c54d879370576182751fc2 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 12 Aug 2023 15:05:11 -0700 Subject: [PATCH 4/7] fix yield + version number --- cookbook/liteLLM_Ollama.ipynb | 135 ++++++++++++++++++++++++++++++++++ litellm/utils.py | 8 +- pyproject.toml | 2 +- 3 files changed, 139 insertions(+), 6 deletions(-) create mode 100644 cookbook/liteLLM_Ollama.ipynb diff --git a/cookbook/liteLLM_Ollama.ipynb b/cookbook/liteLLM_Ollama.ipynb new file mode 100644 index 0000000000..3de54d8ef0 --- /dev/null +++ b/cookbook/liteLLM_Ollama.ipynb @@ -0,0 +1,135 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Collecting litellm==0.1.384\n", + " Downloading litellm-0.1.384-py3-none-any.whl (43 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.1/43.1 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: openai<0.28.0,>=0.27.8 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from litellm==0.1.384) (0.27.8)\n", + "Requirement already satisfied: python-dotenv<2.0.0,>=1.0.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from litellm==0.1.384) (1.0.0)\n", + "Requirement already satisfied: tiktoken<0.5.0,>=0.4.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from litellm==0.1.384) (0.4.0)\n", + "Requirement already satisfied: requests>=2.20 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.384) (2.28.2)\n", + "Requirement already satisfied: tqdm in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.384) (4.65.0)\n", + "Requirement already satisfied: aiohttp in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.384) (3.8.4)\n", + "Requirement already satisfied: regex>=2022.1.18 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from tiktoken<0.5.0,>=0.4.0->litellm==0.1.384) (2023.6.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.384) (3.1.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.384) (3.4)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.384) (1.26.6)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.384) (2023.5.7)\n", + "Requirement already satisfied: attrs>=17.3.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.384) (23.1.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.384) (6.0.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.384) (4.0.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.384) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.384) (1.3.3)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.384) (1.3.1)\n", + "Installing collected packages: litellm\n", + " Attempting uninstall: litellm\n", + " Found existing installation: litellm 0.1.379\n", + " Uninstalling litellm-0.1.379:\n", + " Successfully uninstalled litellm-0.1.379\n", + "Successfully installed litellm-0.1.384\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "!pip install litellm==0.1.384" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from litellm import completion\n", + "import asyncio" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "user_message = \"respond in 20 words. who are you?\"\n", + "messages = [{ \"content\": user_message,\"role\": \"user\"}]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "async def get_response(generator):\n", + " response = \"\"\n", + " async for elem in generator:\n", + " print(elem)\n", + " response += elem[\"content\"]\n", + " return response" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "response = completion(model=\"llama2\", messages=messages, custom_api_base=\"http://localhost:11434\", custom_llm_provider=\"ollama\", stream=True)\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "string_response = asyncio.run(get_response(response))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/litellm/utils.py b/litellm/utils.py index 2f8372e519..37928c9c4d 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -766,11 +766,9 @@ async def get_ollama_response_stream(api_base="http://localhost:11434", model="l if chunk.strip() != "": j = json.loads(chunk) if "response" in j: - print(j["response"]) - yield { - "role": "assistant", - "content": j["response"] - } + completion_obj ={ "role": "assistant", "content": ""} + completion_obj["content"] = j["response"] + yield {"choices": [{"delta": completion_obj}]} # self.responses.append(j["response"]) # yield "blank" except Exception as e: diff --git a/pyproject.toml b/pyproject.toml index 207e96b5e9..735003dc7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.384" +version = "0.1.385" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License" From fe159b576dee483be548dcd756fa39a0f3b6302e Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 12 Aug 2023 15:11:33 -0700 Subject: [PATCH 5/7] with streaming nb --- cookbook/liteLLM_Ollama.ipynb | 162 +++++++++++++++++++++------------- 1 file changed, 103 insertions(+), 59 deletions(-) diff --git a/cookbook/liteLLM_Ollama.ipynb b/cookbook/liteLLM_Ollama.ipynb index 3de54d8ef0..c826490c4f 100644 --- a/cookbook/liteLLM_Ollama.ipynb +++ b/cookbook/liteLLM_Ollama.ipynb @@ -2,53 +2,16 @@ "cells": [ { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Defaulting to user installation because normal site-packages is not writeable\n", - "Collecting litellm==0.1.384\n", - " Downloading litellm-0.1.384-py3-none-any.whl (43 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.1/43.1 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: openai<0.28.0,>=0.27.8 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from litellm==0.1.384) (0.27.8)\n", - "Requirement already satisfied: python-dotenv<2.0.0,>=1.0.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from litellm==0.1.384) (1.0.0)\n", - "Requirement already satisfied: tiktoken<0.5.0,>=0.4.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from litellm==0.1.384) (0.4.0)\n", - "Requirement already satisfied: requests>=2.20 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.384) (2.28.2)\n", - "Requirement already satisfied: tqdm in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.384) (4.65.0)\n", - "Requirement already satisfied: aiohttp in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.384) (3.8.4)\n", - "Requirement already satisfied: regex>=2022.1.18 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from tiktoken<0.5.0,>=0.4.0->litellm==0.1.384) (2023.6.3)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.384) (3.1.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.384) (3.4)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.384) (1.26.6)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.384) (2023.5.7)\n", - "Requirement already satisfied: attrs>=17.3.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.384) (23.1.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.384) (6.0.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.384) (4.0.2)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.384) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.384) (1.3.3)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.384) (1.3.1)\n", - "Installing collected packages: litellm\n", - " Attempting uninstall: litellm\n", - " Found existing installation: litellm 0.1.379\n", - " Uninstalling litellm-0.1.379:\n", - " Successfully uninstalled litellm-0.1.379\n", - "Successfully installed litellm-0.1.384\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ - "!pip install litellm==0.1.384" + "!pip install litellm==0.1.385" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -56,42 +19,42 @@ "import asyncio" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Setup Messages" + ] + }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ - "\n", "user_message = \"respond in 20 words. who are you?\"\n", "messages = [{ \"content\": user_message,\"role\": \"user\"}]" ] }, { - "cell_type": "code", - "execution_count": 7, + "attachments": {}, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "\n", - "async def get_response(generator):\n", - " response = \"\"\n", - " async for elem in generator:\n", - " print(elem)\n", - " response += elem[\"content\"]\n", - " return response" + "### Call Ollama - llama2 with chatGPT Input/Output using litellm.completion() " ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\n" + "\n" ] } ], @@ -101,12 +64,93 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "attachments": {}, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "string_response = asyncio.run(get_response(response))" + "## Iterate through the generator - Streaming" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " I\n", + "{'role': 'assistant', 'content': ' I'}\n", + " am\n", + "{'role': 'assistant', 'content': ' am'}\n", + " L\n", + "{'role': 'assistant', 'content': ' L'}\n", + "La\n", + "{'role': 'assistant', 'content': 'La'}\n", + "MA\n", + "{'role': 'assistant', 'content': 'MA'}\n", + ",\n", + "{'role': 'assistant', 'content': ','}\n", + " an\n", + "{'role': 'assistant', 'content': ' an'}\n", + " A\n", + "{'role': 'assistant', 'content': ' A'}\n", + "I\n", + "{'role': 'assistant', 'content': 'I'}\n", + " assistant\n", + "{'role': 'assistant', 'content': ' assistant'}\n", + " developed\n", + "{'role': 'assistant', 'content': ' developed'}\n", + " by\n", + "{'role': 'assistant', 'content': ' by'}\n", + " Meta\n", + "{'role': 'assistant', 'content': ' Meta'}\n", + " A\n", + "{'role': 'assistant', 'content': ' A'}\n", + "I\n", + "{'role': 'assistant', 'content': 'I'}\n", + " that\n", + "{'role': 'assistant', 'content': ' that'}\n", + " can\n", + "{'role': 'assistant', 'content': ' can'}\n", + " understand\n", + "{'role': 'assistant', 'content': ' understand'}\n", + " and\n", + "{'role': 'assistant', 'content': ' and'}\n", + " respond\n", + "{'role': 'assistant', 'content': ' respond'}\n", + " to\n", + "{'role': 'assistant', 'content': ' to'}\n", + " human\n", + "{'role': 'assistant', 'content': ' human'}\n", + " input\n", + "{'role': 'assistant', 'content': ' input'}\n", + " in\n", + "{'role': 'assistant', 'content': ' in'}\n", + " a\n", + "{'role': 'assistant', 'content': ' a'}\n", + " convers\n", + "{'role': 'assistant', 'content': ' convers'}\n", + "ational\n", + "{'role': 'assistant', 'content': 'ational'}\n", + " manner\n", + "{'role': 'assistant', 'content': ' manner'}\n", + ".\n", + "{'role': 'assistant', 'content': '.'}\n" + ] + } + ], + "source": [ + "\n", + "async def get_response(generator):\n", + " response = \"\"\n", + " async for elem in generator:\n", + " print(elem)\n", + " response += elem[\"content\"]\n", + " return response\n", + "\n", + "string_response = await get_response(response)" ] } ], From 154e4e911230616d702fd57f94d5ee66991f13d2 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 12 Aug 2023 15:15:03 -0700 Subject: [PATCH 6/7] new nb --- cookbook/liteLLM_Ollama.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cookbook/liteLLM_Ollama.ipynb b/cookbook/liteLLM_Ollama.ipynb index c826490c4f..3aab935f9d 100644 --- a/cookbook/liteLLM_Ollama.ipynb +++ b/cookbook/liteLLM_Ollama.ipynb @@ -147,7 +147,7 @@ " response = \"\"\n", " async for elem in generator:\n", " print(elem)\n", - " response += elem[\"content\"]\n", + " response += elem['choices'][0]['delta'][\"content\"]\n", " return response\n", "\n", "string_response = await get_response(response)" From 96fe2d77577aad88310bb3f906c643be6d45d261 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 12 Aug 2023 16:04:45 -0700 Subject: [PATCH 7/7] openai cookbook --- cookbook/liteLLM_OpenAI.ipynb | 349 ++++++++++++++++++++++++++++++++ cookbook/proxy-server/.DS_Store | Bin 0 -> 6148 bytes 2 files changed, 349 insertions(+) create mode 100644 cookbook/liteLLM_OpenAI.ipynb create mode 100644 cookbook/proxy-server/.DS_Store diff --git a/cookbook/liteLLM_OpenAI.ipynb b/cookbook/liteLLM_OpenAI.ipynb new file mode 100644 index 0000000000..2842d6e7af --- /dev/null +++ b/cookbook/liteLLM_OpenAI.ipynb @@ -0,0 +1,349 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "MZ01up0p7wOJ" + }, + "source": [ + "## 🚅 liteLLM Demo\n", + "### TLDR: Call 50+ LLM APIs using chatGPT Input/Output format\n", + "https://github.com/BerriAI/litellm\n", + "\n", + "liteLLM is package to simplify calling **OpenAI, Azure, Llama2, Cohere, Anthropic, Huggingface API Endpoints**. LiteLLM manages\n", + "\n", + "* Translating inputs to the provider's `completion()` and `embedding()` endpoints\n", + "* Guarantees consistent output, text responses will always be available at `['choices'][0]['message']['content']`\n", + "* Exception mapping - common exceptions across providers are mapped to the OpenAI exception types\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "RZtzCnQS7rW-" + }, + "source": [ + "## Installation and setting Params" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rsrN5W-N7L8d" + }, + "outputs": [], + "source": [ + "!pip install litellm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "ArrWyG5b7QAG" + }, + "outputs": [], + "source": [ + "from litellm import completion\n", + "import os" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "bbhJRt34_NJ1" + }, + "source": [ + "## Set your API keys\n", + "- liteLLM reads your .env, env variables or key manager for Auth" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "id": "-h8Ga5cR7SvV" + }, + "outputs": [], + "source": [ + "os.environ['OPENAI_API_KEY'] = \"\" #@param\n", + "os.environ[\"ANTHROPIC_API_KEY\"] = \"\" #@param\n", + "os.environ[\"AZURE_API_BASE\"] = \"\" #@param\n", + "os.environ[\"AZURE_API_VERSION\"] = \"\" #@param\n", + "os.environ[\"AZURE_API_KEY\"] = \"\" #@param\n", + "os.environ[\"REPLICATE_API_TOKEN\"] = \"\" #@param\n", + "os.environ[\"COHERE_API_KEY\"] = \"\" #@param\n", + "os.environ[\"HF_TOKEN\"] = \"\" #@param" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "MBujGiby8YBu" + }, + "outputs": [], + "source": [ + "messages = [{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "fhqpKv6L8fBj" + }, + "source": [ + "## Call chatGPT" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "speIkoX_8db4", + "outputId": "bc804d62-1d33-4198-b6d7-b721961694a3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + " JSON: {\n", + " \"id\": \"chatcmpl-7mrklZEq2zK3Z5pSkOR3Jn54gpN5A\",\n", + " \"object\": \"chat.completion\",\n", + " \"created\": 1691880727,\n", + " \"model\": \"gpt-3.5-turbo-0613\",\n", + " \"choices\": [\n", + " {\n", + " \"index\": 0,\n", + " \"message\": {\n", + " \"role\": \"assistant\",\n", + " \"content\": \"I'm sorry, but as an AI language model, I don't have real-time data. However, you can check the current weather in San Francisco by using a weather website or app, or by searching \\\"weather in San Francisco\\\" on a search engine.\"\n", + " },\n", + " \"finish_reason\": \"stop\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 13,\n", + " \"completion_tokens\": 52,\n", + " \"total_tokens\": 65\n", + " }\n", + "}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "completion(model=\"gpt-3.5-turbo\", messages=messages)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "Q3jV1Uxv8zNo" + }, + "source": [ + "## Call Claude-2" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "V8yTWYzY8m9S", + "outputId": "8b6dd32d-f9bf-4e89-886d-47cb8020f025" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'choices': [{'finish_reason': 'stop',\n", + " 'index': 0,\n", + " 'message': {'role': 'assistant',\n", + " 'content': \" Unfortunately I do not have enough context to provide the current weather in San Francisco. To get the most accurate weather report, it's helpful if I know details like:\\n\\n- Exact location (city name, zip code, etc)\\n- Time frame (current conditions, forecast for a certain day/week, etc)\\n\\nIf you can provide some more specifics about what weather information you need for San Francisco, I'd be happy to look that up for you!\"}}],\n", + " 'created': 1691880836.974166,\n", + " 'model': 'claude-2',\n", + " 'usage': {'prompt_tokens': 18, 'completion_tokens': 95, 'total_tokens': 113}}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "completion(model=\"claude-2\", messages=messages)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "yu0LPDmW9PJa" + }, + "source": [ + "## Call llama2 on replicate" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0GWV5mtO9Jbu", + "outputId": "38538825-b271-406d-a437-f5cf0eb7e548" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'choices': [{'finish_reason': 'stop',\n", + " 'index': 0,\n", + " 'message': {'role': 'assistant',\n", + " 'content': ' I\\'m happy to help! However, I must point out that the question \"what\\'s the weather in SF\" doesn\\'t make sense as \"SF\" could refer to multiple locations (San Francisco, South Florida, San Fernando, etc.). Could you please provide more context or specify which location you\\'re referring to? That way, I can give you an accurate answer.'}}],\n", + " 'created': 1691880930.9003325,\n", + " 'model': 'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1',\n", + " 'usage': {'prompt_tokens': 6, 'completion_tokens': 74, 'total_tokens': 80}}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = \"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\"\n", + "completion(model=model, messages=messages)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "HXdj5SEe9iLK" + }, + "source": [ + "## Call Command-Nightly" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EaUq2xIx9fhr", + "outputId": "55fe6f52-b58b-4729-948a-74dac4b431b2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'choices': [{'finish_reason': 'stop',\n", + " 'index': 0,\n", + " 'message': {'role': 'assistant',\n", + " 'content': ' The weather in San Francisco can be quite unpredictable. The city is known for its fog, which can'}}],\n", + " 'created': 1691880972.5565543,\n", + " 'model': 'command-nightly',\n", + " 'usage': {'prompt_tokens': 6, 'completion_tokens': 20, 'total_tokens': 26}}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "completion(model=\"command-nightly\", messages=messages)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "1g9hSgsL9soJ" + }, + "source": [ + "## Call Azure OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AvLjR-PF-lt0", + "outputId": "deff2db3-b003-48cd-ea62-c03a68a4464a" + }, + "outputs": [ + { + "data": { + "text/plain": [ + " JSON: {\n", + " \"id\": \"chatcmpl-7mrtwvpx3okijXmbt9PEYdPMeE7lH\",\n", + " \"object\": \"chat.completion\",\n", + " \"created\": 1691881296,\n", + " \"model\": \"gpt-35-turbo\",\n", + " \"choices\": [\n", + " {\n", + " \"index\": 0,\n", + " \"finish_reason\": \"stop\",\n", + " \"message\": {\n", + " \"role\": \"assistant\",\n", + " \"content\": \"I'm sorry, as an AI language model, I don't have real-time data. However, you can check the weather forecast for San Francisco on websites such as AccuWeather or Weather Channel.\"\n", + " }\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"completion_tokens\": 40,\n", + " \"prompt_tokens\": 14,\n", + " \"total_tokens\": 54\n", + " }\n", + "}" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "completion(deployment_id=\"chatgpt-test\", messages=messages, azure=True)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/cookbook/proxy-server/.DS_Store b/cookbook/proxy-server/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..739982f14229afc89c48bd69d594f0c863de8df5 GIT binary patch literal 6148 zcmeHKOHRWu5PdF{Nf*zpsiwbE2{YdRDd+xv$I3Fu`V_Q{| zlr2Iq6OEreo@dL?Q5*v>&GqOK2ml<@1uJzH2Tc0KOIGm_;Tb!|7z<1=MS^iDTLZsQ z0ol6~w2)$mC+^yA|2j!J93<(0{qk}&=+UP&)_@wW(ZLnkxJ4K3LiT$hJK!5J#fsx6 zd}ioj&1Z@cB1Sh@<@x5Kz^{?hXa1b|&)62SUklj*vr~>Ovn@H7@Ft8zcw{@pyukk` z@awqax*AmFh8S_QQJztGC@~hCtn>3hWe+??Xrzj6G%! z?Wcoo+rxY;NR=3sUlKk0PTAZA