From 03a43e158a3b15452432a41bbc526581b461e881 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 10:37:41 -0700 Subject: [PATCH 01/34] add import manager - make package lighter --- litellm/integrations/helicone.py | 2 +- litellm/main.py | 17 ++++++++++++----- litellm/tests/test_completion.py | 18 ++++++++++-------- litellm/utils.py | 16 +++++++++++++++- pyproject.toml | 9 ++++----- requirements.txt | 5 +---- 6 files changed, 43 insertions(+), 24 deletions(-) diff --git a/litellm/integrations/helicone.py b/litellm/integrations/helicone.py index 6b3d619659..9e74b246f9 100644 --- a/litellm/integrations/helicone.py +++ b/litellm/integrations/helicone.py @@ -2,7 +2,6 @@ # On success, logs events to Helicone import dotenv, os import requests -from anthropic import HUMAN_PROMPT, AI_PROMPT dotenv.load_dotenv() # Loading env variables using dotenv import traceback class HeliconeLogger: @@ -14,6 +13,7 @@ class HeliconeLogger: self.key = os.getenv('HELICONE_API_KEY') def claude_mapping(self, model, messages, response_obj): + from anthropic import HUMAN_PROMPT, AI_PROMPT prompt = f"{HUMAN_PROMPT}" for message in messages: if "role" in message: diff --git a/litellm/main.py b/litellm/main.py index 7803de2a96..ea4d43a638 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1,6 +1,5 @@ -import os, openai, cohere, replicate, sys +import os, openai, sys from typing import Any -from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT from functools import partial import dotenv, traceback, random, asyncio, time from copy import deepcopy @@ -13,7 +12,7 @@ from tenacity import ( stop_after_attempt, wait_random_exponential, ) # for exponential backoff -from litellm.utils import get_secret +from litellm.utils import get_secret, install_and_import ####### ENVIRONMENT VARIABLES ################### dotenv.load_dotenv() # Loading env variables using dotenv @@ -28,9 +27,7 @@ new_response = { } ] } -# TODO move this to utils.py # TODO add translations -# TODO see if this worked - model_name == krrish ####### COMPLETION ENDPOINTS ################ ############################################# async def acompletion(*args, **kwargs): @@ -68,6 +65,7 @@ def completion( openai.api_type = "azure" openai.api_base = litellm.api_base if litellm.api_base is not None else get_secret("AZURE_API_BASE") openai.api_version = litellm.api_version if litellm.api_version is not None else get_secret("AZURE_API_VERSION") + # set key if api_key: openai.api_key = api_key elif litellm.azure_key: @@ -92,6 +90,7 @@ def completion( ) elif model in litellm.open_ai_chat_completion_models: openai.api_type = "openai" + # note: if a user sets a custom base - we should ensure this works openai.api_base = litellm.api_base if litellm.api_base is not None else "https://api.openai.com/v1" openai.api_version = None if litellm.organization: @@ -155,6 +154,8 @@ def completion( model_response["usage"] = response["usage"] response = model_response elif "replicate" in model: + # import replicate/if it fails then pip install replicate + install_and_import("replicate") # replicate defaults to os.environ.get("REPLICATE_API_TOKEN") # checking in case user set it to REPLICATE_API_KEY instead if not get_secret("REPLICATE_API_TOKEN") and get_secret("REPLICATE_API_KEY"): @@ -194,6 +195,10 @@ def completion( } response = model_response elif model in litellm.anthropic_models: + # import anthropic/if it fails then pip install anthropic + install_and_import("anthropic") + from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT + #anthropic defaults to os.environ.get("ANTHROPIC_API_KEY") if api_key: os.environ["ANTHROPIC_API_KEY"] = api_key @@ -239,6 +244,8 @@ def completion( } response = model_response elif model in litellm.cohere_models: + # import cohere/if it fails then pip install cohere + install_and_import("cohere") if api_key: cohere_key = api_key elif litellm.cohere_key: diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index a4c151e5bc..925483f32f 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -7,8 +7,10 @@ sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the import pytest import litellm from litellm import embedding, completion +from infisical import InfisicalClient # litellm.set_verbose = True +litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"]) user_message = "Hello, whats the weather in San Francisco??" messages = [{ "content": user_message,"role": "user"}] @@ -16,6 +18,14 @@ messages = [{ "content": user_message,"role": "user"}] def logger_fn(user_model_dict): print(f"user_model_dict: {user_model_dict}") +def test_completion_claude(): + try: + response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn) + # Add any assertions here to check the response + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + def test_completion_openai(): try: response = completion(model="gpt-3.5-turbo", messages=messages) @@ -84,14 +94,6 @@ def test_completion_azure(): except Exception as e: pytest.fail(f"Error occurred: {e}") -def test_completion_claude(): - try: - response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn) - # Add any assertions here to check the response - print(response) - except Exception as e: - pytest.fail(f"Error occurred: {e}") - def test_completion_cohere(): try: response = completion(model="command-nightly", messages=messages, max_tokens=500) diff --git a/litellm/utils.py b/litellm/utils.py index d32b946892..5b48201314 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4,7 +4,6 @@ import subprocess, os import litellm, openai import random, uuid, requests import datetime, time -from anthropic import Anthropic import tiktoken encoding = tiktoken.get_encoding("cl100k_base") from .integrations.helicone import HeliconeLogger @@ -34,6 +33,19 @@ def print_verbose(print_statement): if random.random() <= 0.3: print("Get help - https://discord.com/invite/wuPM9dRgDw") +####### Package Import Handler ################### +import importlib +import subprocess +def install_and_import(package): + try: + importlib.import_module(package) + except ImportError: + print(f"{package} is not installed. Installing...") + subprocess.call([sys.executable, '-m', 'pip', 'install', package]) + finally: + globals()[package] = importlib.import_module(package) +################################################## + ####### LOGGING ################### #Logging function -> log the exact model details + what's being sent | Non-Blocking def logging(model=None, input=None, azure=False, additional_args={}, logger_fn=None, exception=None): @@ -329,6 +341,8 @@ def prompt_token_calculator(model, messages): text = " ".join(message["content"] for message in messages) num_tokens = 0 if "claude" in model: + install_and_import('anthropic') + from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT anthropic = Anthropic() num_tokens = anthropic.count_tokens(text) else: diff --git a/pyproject.toml b/pyproject.toml index 9953d8fe4c..20916e7823 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,12 +8,11 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.8" -openai = {extras = ["datalib"], version = "^0.27.8"} -cohere = "^4.18.0" +openai = "^0.27.8" + pytest = "^7.4.0" -pydantic = "^2.1.1" -anthropic = "^0.3.7" -replicate = "^0.10.0" + + python-dotenv = "^1.0.0" tenacity = "^8.0.1" tiktoken = "^0.4.0" diff --git a/requirements.txt b/requirements.txt index 87aa1e7376..ba5e487e38 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,5 @@ -pydantic +# used by CI/CD testing openai -cohere -anthropic -replicate pytest python-dotenv openai[datalib] From 8e863120feb97214119184cf141c5bb24d9fbce9 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 10:46:20 -0700 Subject: [PATCH 02/34] add import for co, anth --- .DS_Store | Bin 6148 -> 6148 bytes litellm/main.py | 2 ++ litellm/tests/test_completion.py | 16 +++++++++------- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/.DS_Store b/.DS_Store index 17cee6a0acfa4d370d7d9b919baa2fb3a72b4c2f..22073074832468582ce431f5bcd4c2f80be08229 100644 GIT binary patch delta 53 zcmZoMXfc@JFUrBdz`)4BAi&_6lb@WFlb;0S3v3qTSkBDIu{nT6m~rz`)+DBh4Q-p* IIsWnk075|xYybcN delta 182 zcmZoMXfc@JFUrNhz`)4BAi%(o!;s2Q!jQ?3&ycrSkYhQsD@aO+AsHx;%TRzU%@7Y% zn95KLB=vwYm7Y2I$w@i+NkH{La~ReGvEF|$U|`tH!y?RRF9=kg%a8;lQ-Cm&A&(&) Uzafm>D2B+erZR12=lIJH09xKHMgRZ+ diff --git a/litellm/main.py b/litellm/main.py index ea4d43a638..33f7676d39 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -156,6 +156,7 @@ def completion( elif "replicate" in model: # import replicate/if it fails then pip install replicate install_and_import("replicate") + import replicate # replicate defaults to os.environ.get("REPLICATE_API_TOKEN") # checking in case user set it to REPLICATE_API_KEY instead if not get_secret("REPLICATE_API_TOKEN") and get_secret("REPLICATE_API_KEY"): @@ -246,6 +247,7 @@ def completion( elif model in litellm.cohere_models: # import cohere/if it fails then pip install cohere install_and_import("cohere") + import cohere if api_key: cohere_key = api_key elif litellm.cohere_key: diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 925483f32f..52c9373cc6 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -26,6 +26,14 @@ def test_completion_claude(): except Exception as e: pytest.fail(f"Error occurred: {e}") +def test_completion_cohere(): + try: + response = completion(model="command-nightly", messages=messages, max_tokens=500) + # Add any assertions here to check the response + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + def test_completion_openai(): try: response = completion(model="gpt-3.5-turbo", messages=messages) @@ -94,13 +102,7 @@ def test_completion_azure(): except Exception as e: pytest.fail(f"Error occurred: {e}") -def test_completion_cohere(): - try: - response = completion(model="command-nightly", messages=messages, max_tokens=500) - # Add any assertions here to check the response - print(response) - except Exception as e: - pytest.fail(f"Error occurred: {e}") + # Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect. # [TODO] improve our try-except block to handle for these From 3332cc2065ef9773e5a31f2c306f9b186c3983d1 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 10:58:43 -0700 Subject: [PATCH 03/34] remove deps datalib, pytest, tenacity, infisical --- .circleci/config.yml | 2 ++ litellm/__init__.py | 2 +- litellm/main.py | 5 ----- litellm/tests/test_embedding.py | 20 ++++++++++++++++++++ pyproject.toml | 4 ---- requirements.txt | 7 ++----- 6 files changed, 25 insertions(+), 15 deletions(-) create mode 100644 litellm/tests/test_embedding.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 20260abceb..edefc4b7e0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,6 +13,8 @@ jobs: command: | python -m pip install --upgrade pip python -m pip install -r requirements.txt + pip install infisical + pip install pytest # Run pytest and generate JUnit XML report - run: diff --git a/litellm/__init__.py b/litellm/__init__.py index 937b22c14b..aaa36737ef 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -4,7 +4,7 @@ failure_callback = [] set_verbose=False telemetry=True max_tokens = 256 # OpenAI Defaults -retry = True # control tenacity retries. +retry = True openai_key = None azure_key = None anthropic_key = None diff --git a/litellm/main.py b/litellm/main.py index 33f7676d39..299376458f 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -7,11 +7,6 @@ import litellm from litellm import client, logging, exception_type, timeout, get_optional_params import tiktoken encoding = tiktoken.get_encoding("cl100k_base") -from tenacity import ( - retry, - stop_after_attempt, - wait_random_exponential, -) # for exponential backoff from litellm.utils import get_secret, install_and_import ####### ENVIRONMENT VARIABLES ################### dotenv.load_dotenv() # Loading env variables using dotenv diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py new file mode 100644 index 0000000000..be2b30a81b --- /dev/null +++ b/litellm/tests/test_embedding.py @@ -0,0 +1,20 @@ + +import sys, os +import traceback +import pytest + +sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path +import litellm +from litellm import embedding, completion +from infisical import InfisicalClient + +# litellm.set_verbose = True +litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"]) + +def test_openai_embedding(): + try: + response = embedding(model='text-embedding-ada-002', input=["good morning from litellm"]) + # Add any assertions here to check the response + print(f"response: {str(response)}") + except Exception as e: + pytest.fail(f"Error occurred: {e}") diff --git a/pyproject.toml b/pyproject.toml index 20916e7823..d75e1762d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,12 +9,8 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.8" openai = "^0.27.8" - pytest = "^7.4.0" - - python-dotenv = "^1.0.0" -tenacity = "^8.0.1" tiktoken = "^0.4.0" [build-system] diff --git a/requirements.txt b/requirements.txt index ba5e487e38..56f796b35c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,5 @@ # used by CI/CD testing openai -pytest python-dotenv -openai[datalib] -tenacity -tiktoken -infisical \ No newline at end of file +openai +tiktoken \ No newline at end of file From 33547c761148b8e5cbf77b673d4b5909e17e4cbe Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 11:32:31 -0700 Subject: [PATCH 04/34] fix circle ci test --- .circleci/config.yml | 1 + litellm/tests/test_embedding.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index edefc4b7e0..397031de7d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -15,6 +15,7 @@ jobs: python -m pip install -r requirements.txt pip install infisical pip install pytest + pip install openai[datalib] # Run pytest and generate JUnit XML report - run: diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py index be2b30a81b..ce83ffc70a 100644 --- a/litellm/tests/test_embedding.py +++ b/litellm/tests/test_embedding.py @@ -17,4 +17,4 @@ def test_openai_embedding(): # Add any assertions here to check the response print(f"response: {str(response)}") except Exception as e: - pytest.fail(f"Error occurred: {e}") + pytest.fail(f"Error occurred: {e}") \ No newline at end of file From 79af0ea052f6ae0ec0725eea1f567c7d499bd8fe Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 11:34:43 -0700 Subject: [PATCH 05/34] remove pytest as a package dep --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d75e1762d5..92434afdf3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,6 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.8" openai = "^0.27.8" -pytest = "^7.4.0" python-dotenv = "^1.0.0" tiktoken = "^0.4.0" From d72dc244b134da3cf5df1f2e6dc9486ce15e5427 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 11:35:20 -0700 Subject: [PATCH 06/34] new version toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 92434afdf3..f53cd4d49a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.356" +version = "0.1.360" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License" From 5c17f90173bab07c4591b5cf83fab9f46308bf7b Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 13:35:37 -0700 Subject: [PATCH 07/34] add hf support --- litellm/__init__.py | 2 ++ litellm/main.py | 29 ++++++++++++++++++++++++++++- litellm/tests/test_completion.py | 10 ++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/litellm/__init__.py b/litellm/__init__.py index aaa36737ef..01559e3d18 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -10,6 +10,8 @@ azure_key = None anthropic_key = None replicate_key = None cohere_key = None + +hugging_api_token = None ####### THREAD-SPECIFIC DATA ################### class MyLocal(threading.local): def __init__(self): diff --git a/litellm/main.py b/litellm/main.py index 299376458f..4fc3bcb05e 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -44,7 +44,8 @@ def completion( temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'), presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", deployment_id=None, # Optional liteLLM function params - *, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False + *, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False, + hugging_face = False ): try: global new_response @@ -273,6 +274,32 @@ def completion( "total_tokens": prompt_tokens + completion_tokens } response = model_response + elif hugging_face == True: + import requests + API_URL = f"https://api-inference.huggingface.co/models/{model}" + HF_TOKEN = get_secret("HF_TOKEN") + headers = {"Authorization": f"Bearer {HF_TOKEN}"} + + prompt = " ".join([message["content"] for message in messages]) + ## LOGGING + logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn) + input_payload = {"inputs": prompt} + response = requests.post(API_URL, headers=headers, json=input_payload) + + completion_response = response.json()[0]['generated_text'] + ## LOGGING + logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn) + prompt_tokens = len(encoding.encode(prompt)) + completion_tokens = len(encoding.encode(completion_response)) + ## RESPONSE OBJECT + model_response["choices"][0]["message"]["content"] = completion_response + model_response["created"] = time.time() + model_response["model"] = model + model_response["usage"] = { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": prompt_tokens + completion_tokens + } else: ## LOGGING logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 52c9373cc6..e001daa615 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -26,6 +26,16 @@ def test_completion_claude(): except Exception as e: pytest.fail(f"Error occurred: {e}") +def test_completion_hf_api(): + try: + user_message = "write some code to find the sum of two numbers" + messages = [{ "content": user_message,"role": "user"}] + response = completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True) + # Add any assertions here to check the response + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + def test_completion_cohere(): try: response = completion(model="command-nightly", messages=messages, max_tokens=500) From 2a216fbb3edc1351603572aee9f8e604afb5503c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 8 Aug 2023 13:37:32 -0700 Subject: [PATCH 08/34] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3ddbe4ecbf..bcb0608f6d 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.345-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/) [![CircleCI](https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main) ![Downloads](https://img.shields.io/pypi/dm/litellm) -[![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere-blue?color=green)](https://github.com/BerriAI/litellm) +[![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere%7CReplicate%7CHugging%20Face-blue?color=green)](https://github.com/BerriAI/litellm) Get Support / Join the community 👉 [![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw) From 03a330517d849dba0976a5339b729658e0037c43 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 13:45:47 -0700 Subject: [PATCH 09/34] new version toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f53cd4d49a..d260539972 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.360" +version = "0.1.361" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License" From b4bc5edde06696c974c00c3b58fbf4380ff10d18 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 13:55:27 -0700 Subject: [PATCH 10/34] add docs --- docs/supported.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/supported.md b/docs/supported.md index 692a55e7dc..0eb2ec3d15 100644 --- a/docs/supported.md +++ b/docs/supported.md @@ -36,3 +36,16 @@ | claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` | | claude-v2 | `completion('claude-v2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | +### Hugging Face Inference API +All `text2text-generation`, `text-generation` are supported by liteLLM +In order to use models on hugging face inference: +* copy the `model repo` from hugging face and set it as the `model` parameter in the completion call. +* set `hugging_face` to `True` + +| Model Name | Function Call | Required OS Variables | +|------------------|--------------------------------------------|--------------------------------------| +| stabilityai/stablecode-completion-alpha-3b-4k | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True) | `os.environ['HF_TOKEN']` | +| google/flan-t5-xxl | `completion(model="google/flan-t5-xxl", messages=messages, hugging_face=True) | `os.environ['HF_TOKEN']` | + + + From a94dc9369f2727c4433b9043aed8da7873ba262f Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 13:58:58 -0700 Subject: [PATCH 11/34] fix model response --- litellm/main.py | 1 + pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/litellm/main.py b/litellm/main.py index 4fc3bcb05e..8f7873099c 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -300,6 +300,7 @@ def completion( "completion_tokens": completion_tokens, "total_tokens": prompt_tokens + completion_tokens } + response = model_response else: ## LOGGING logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) diff --git a/pyproject.toml b/pyproject.toml index d260539972..20bc61f467 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.361" +version = "0.1.362" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License" From 318c0cbada3c7e096283c22b670c30630d98fa1e Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 14:21:11 -0700 Subject: [PATCH 12/34] add example on using HF models --- .../community-resources}/max_tokens.json | 0 cookbook/liteLLM_Hugging_Face_Example.ipynb | 153 ++++++++++++++++++ 2 files changed, 153 insertions(+) rename {community_resources => cookbook/community-resources}/max_tokens.json (100%) create mode 100644 cookbook/liteLLM_Hugging_Face_Example.ipynb diff --git a/community_resources/max_tokens.json b/cookbook/community-resources/max_tokens.json similarity index 100% rename from community_resources/max_tokens.json rename to cookbook/community-resources/max_tokens.json diff --git a/cookbook/liteLLM_Hugging_Face_Example.ipynb b/cookbook/liteLLM_Hugging_Face_Example.ipynb new file mode 100644 index 0000000000..9c64e2df25 --- /dev/null +++ b/cookbook/liteLLM_Hugging_Face_Example.ipynb @@ -0,0 +1,153 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## Install liteLLM https://github.com/BerriAI/litellm\n", + "liteLLM provides one interface to call gpt 3.5, hugging face inference endpoints" + ], + "metadata": { + "id": "IGQZtR61AZSd" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "x_4jcmmXcdm-", + "outputId": "c89e7817-561d-4867-904b-aa1634565cbb" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: litellm==0.1.362 in /usr/local/lib/python3.10/dist-packages (0.1.362)\n", + "Requirement already satisfied: openai<0.28.0,>=0.27.8 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.362) (0.27.8)\n", + "Requirement already satisfied: python-dotenv<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.362) (1.0.0)\n", + "Requirement already satisfied: tiktoken<0.5.0,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.362) (0.4.0)\n", + "Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.362) (2.28.2)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.362) (4.65.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.362) (3.8.5)\n", + "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken<0.5.0,>=0.4.0->litellm==0.1.362) (2022.10.31)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.362) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.362) (3.4)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.362) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.362) (2023.7.22)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (23.1.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (6.0.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (4.0.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (1.4.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (1.3.1)\n" + ] + } + ], + "source": [ + "!pip install litellm==\"0.1.362\"" + ] + }, + { + "cell_type": "code", + "source": [ + "from litellm import completion\n", + "import os\n", + "user_message = \"Hello, whats the weather in San Francisco??\"\n", + "messages = [{ \"content\": user_message,\"role\": \"user\"}]\n", + "\n", + "os.environ['HF_TOKEN'] = \"\"#@param\n", + "# get your hugging face token from here:\n", + "# https://huggingface.co/settings/tokens\n", + "\n", + "# Optional if you want to run OpenAI TOO\n", + "os.environ['OPENAI_API_KEY'] = \"\" #@param\n", + "\n", + "response = completion(\"stabilityai/stablecode-completion-alpha-3b-4k\", messages=messages, hugging_face=True)\n", + "print(\"Response from stabilityai/stablecode-completion-alpha-3b-4k\")\n", + "print(response['choices'][0]['message']['content'])\n", + "print(\"\\n\\n\")\n", + "\n", + "response = completion(\"bigcode/starcoder\", messages=messages, hugging_face=True)\n", + "print(\"Response from bigcode/starcoder\")\n", + "print(response['choices'][0]['message']['content'])\n", + "print(\"\\n\\n\")\n", + "\n", + "response = completion(\"google/flan-t5-xxl\", messages=messages, hugging_face=True)\n", + "print(\"Response from google/flan-t5-xxl\")\n", + "print(response['choices'][0]['message']['content'])\n", + "print(\"\\n\\n\")\n", + "\n", + "response = completion(\"google/flan-t5-large\", messages=messages, hugging_face=True)\n", + "print(\"Response from google/flan-t5-large\")\n", + "print(response['choices'][0]['message']['content'])\n", + "print(\"\\n\\n\")\n", + "\n", + "response = completion(model=\"gpt-3.5-turbo\", messages=messages)\n", + "print(response['choices'][0]['message']['content'])\n", + "print(response)\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vC54VW3jvLnN", + "outputId": "e6616221-12c9-4313-dd03-fd94fa095e8e" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Response from stabilityai/stablecode-completion-alpha-3b-4k\n", + "Hello, whats the weather in San Francisco??\",\n", + " \"id\": 1,\n", + " \"\n", + "\n", + "\n", + "\n", + "Response from bigcode/starcoder\n", + "Hello, whats the weather in San Francisco??\")\n", + "\n", + "# print(response)\n", + "\n", + "# print(response.text)\n", + "\n", + "#\n", + "\n", + "\n", + "\n", + "Response from google/flan-t5-xxl\n", + "a little cold\n", + "\n", + "\n", + "\n", + "Response from google/flan-t5-large\n", + "cool\n", + "\n", + "\n", + "\n", + "I'm sorry, but I am an AI language model and do not have real-time data. However, you can check the weather in San Francisco by searching for \"San Francisco weather\" on a search engine or checking a reliable weather website or app.\n" + ] + } + ] + } + ] +} \ No newline at end of file From 225efba211898fd404ea944ae207e944344ac689 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 14:29:07 -0700 Subject: [PATCH 13/34] update docs --- docs/supported.md | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/docs/supported.md b/docs/supported.md index 0eb2ec3d15..7d3cff87e3 100644 --- a/docs/supported.md +++ b/docs/supported.md @@ -37,15 +37,22 @@ | claude-v2 | `completion('claude-v2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | ### Hugging Face Inference API -All `text2text-generation`, `text-generation` are supported by liteLLM -In order to use models on hugging face inference: -* copy the `model repo` from hugging face and set it as the `model` parameter in the completion call. -* set `hugging_face` to `True` -| Model Name | Function Call | Required OS Variables | -|------------------|--------------------------------------------|--------------------------------------| -| stabilityai/stablecode-completion-alpha-3b-4k | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True) | `os.environ['HF_TOKEN']` | -| google/flan-t5-xxl | `completion(model="google/flan-t5-xxl", messages=messages, hugging_face=True) | `os.environ['HF_TOKEN']` | +All [`text2text-generation`](https://huggingface.co/models?library=transformers&pipeline_tag=text2text-generation&sort=downloads) and [`text-generation`](https://huggingface.co/models?library=transformers&pipeline_tag=text-generation&sort=downloads) models are supported by liteLLM. You can use any text model from Hugging Face with the following steps: +1. Copy the `model repo` URL from Hugging Face and set it as the `model` parameter in the completion call. +2. Set `hugging_face` parameter to `True`. +3. Make sure to set the hugging face API key + +Here are some examples of supported models: +**Note that the models mentioned in the table are examples, and you can use any text model available on Hugging Face by following the steps above.** + +| Model Name | Function Call | Required OS Variables | +|------------------|-------------------------------------------------------------------------------------|--------------------------------------| +| [stabilityai/stablecode-completion-alpha-3b-4k](https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k) | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True)` | `os.environ['HF_TOKEN']` | +| [bigcode/starcoder](https://huggingface.co/bigcode/starcoder) | `completion(model="bigcode/starcoder", messages=messages, hugging_face=True)` | `os.environ['HF_TOKEN']` | +| [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl) | `completion(model="google/flan-t5-xxl", messages=messages, hugging_face=True)` | `os.environ['HF_TOKEN']` | +| [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) | `completion(model="google/flan-t5-large", messages=messages, hugging_face=True)` | `os.environ['HF_TOKEN']` | + From 6c72b0a12a624f807717fd614228445feef860ee Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 14:44:37 -0700 Subject: [PATCH 14/34] udpate docs --- docs/supported.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/supported.md b/docs/supported.md index 7d3cff87e3..78fbd09507 100644 --- a/docs/supported.md +++ b/docs/supported.md @@ -39,9 +39,10 @@ ### Hugging Face Inference API All [`text2text-generation`](https://huggingface.co/models?library=transformers&pipeline_tag=text2text-generation&sort=downloads) and [`text-generation`](https://huggingface.co/models?library=transformers&pipeline_tag=text-generation&sort=downloads) models are supported by liteLLM. You can use any text model from Hugging Face with the following steps: -1. Copy the `model repo` URL from Hugging Face and set it as the `model` parameter in the completion call. -2. Set `hugging_face` parameter to `True`. -3. Make sure to set the hugging face API key + +* Copy the `model repo` URL from Hugging Face and set it as the `model` parameter in the completion call. +* Set `hugging_face` parameter to `True`. +* Make sure to set the hugging face API key Here are some examples of supported models: **Note that the models mentioned in the table are examples, and you can use any text model available on Hugging Face by following the steps above.** From 1292038148cf9c20bda96895b0c57efac5a77d1f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 8 Aug 2023 14:49:42 -0700 Subject: [PATCH 15/34] Update README.md --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index bcb0608f6d..8df5f3dde3 100644 --- a/README.md +++ b/README.md @@ -25,11 +25,6 @@ pip install litellm ```python from litellm import completion -## set ENV variables -# ENV variables can be set in .env file, too. Example in .env.example -os.environ["OPENAI_API_KEY"] = "openai key" -os.environ["COHERE_API_KEY"] = "cohere key" - messages = [{ "content": "Hello, how are you?","role": "user"}] # openai call @@ -41,6 +36,9 @@ response = completion("command-nightly", messages) # azure openai call response = completion("chatgpt-test", messages, azure=True) +# hugging face call +response = completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True) + # openrouter call response = completion("google/palm-2-codechat-bison", messages) ``` From e4f96075c3254ca5657f91c1ecf9ab08d7f806f1 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 15:18:48 -0700 Subject: [PATCH 16/34] fix docs claude2 --- docs/supported.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/supported.md b/docs/supported.md index 78fbd09507..713e7313fe 100644 --- a/docs/supported.md +++ b/docs/supported.md @@ -34,7 +34,7 @@ | Model Name | Function Call | Required OS Variables | |------------------|--------------------------------------------|--------------------------------------| | claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` | -| claude-v2 | `completion('claude-v2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | +| claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | ### Hugging Face Inference API From 36a6ac9b08fab73f475f198cfa8698ac3f4ceb6a Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 15:57:24 -0700 Subject: [PATCH 17/34] streaming for anthropic --- litellm/main.py | 24 +++++++++++- litellm/tests/test_completion.py | 13 +++++++ litellm/utils.py | 67 +++++++++++++++++++------------- 3 files changed, 75 insertions(+), 29 deletions(-) diff --git a/litellm/main.py b/litellm/main.py index 8f7873099c..8d8c78e256 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -11,6 +11,19 @@ from litellm.utils import get_secret, install_and_import ####### ENVIRONMENT VARIABLES ################### dotenv.load_dotenv() # Loading env variables using dotenv +# TODO this will evolve to accepting models +# replicate/anthropic/cohere +class CustomStreamWrapper: + def __init__(self, completion_stream): + self.completion_stream = completion_stream + + def __iter__(self): + return self + + def __next__(self): + chunk = next(self.completion_stream) + return {"choices": [{"delta": chunk.completion}]} + new_response = { "choices": [ { @@ -54,7 +67,8 @@ def completion( optional_params = get_optional_params( functions=functions, function_call=function_call, temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens, - presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user, deployment_id=deployment_id + presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user, deployment_id=deployment_id, + model=model ) if azure == True: # azure configs @@ -222,8 +236,14 @@ def completion( completion = anthropic.completions.create( model=model, prompt=prompt, - max_tokens_to_sample=max_tokens_to_sample + max_tokens_to_sample=max_tokens_to_sample, + **optional_params ) + if optional_params['stream'] == True: + # don't try to access stream object, + response = CustomStreamWrapper(completion) + return response + completion_response = completion.completion ## LOGGING logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn) diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index e001daa615..35f7b631d7 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -26,6 +26,19 @@ def test_completion_claude(): except Exception as e: pytest.fail(f"Error occurred: {e}") +def test_completion_claude_stream(): + try: + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "how does a court case get to the Supreme Court?"} + ] + response = completion(model="claude-2", messages=messages, stream=True) + # Add any assertions here to check the response + for chunk in response: + print(chunk['choices'][0]['delta']) # same as openai format + except Exception as e: + pytest.fail(f"Error occurred: {e}") + def test_completion_hf_api(): try: user_message = "write some code to find the sum of two numbers" diff --git a/litellm/utils.py b/litellm/utils.py index 5b48201314..599c61e246 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -146,36 +146,49 @@ def get_optional_params( frequency_penalty = 0, logit_bias = {}, user = "", - deployment_id = None + deployment_id = None, + model = None, ): optional_params = {} - if functions != []: - optional_params["functions"] = functions - if function_call != "": - optional_params["function_call"] = function_call - if temperature != 1: - optional_params["temperature"] = temperature - if top_p != 1: - optional_params["top_p"] = top_p - if n != 1: - optional_params["n"] = n - if stream: + if model in litellm.anthropic_models: + # handle anthropic params + if stream: optional_params["stream"] = stream - if stop != None: - optional_params["stop"] = stop - if max_tokens != float('inf'): - optional_params["max_tokens"] = max_tokens - if presence_penalty != 0: - optional_params["presence_penalty"] = presence_penalty - if frequency_penalty != 0: - optional_params["frequency_penalty"] = frequency_penalty - if logit_bias != {}: - optional_params["logit_bias"] = logit_bias - if user != "": - optional_params["user"] = user - if deployment_id != None: - optional_params["deployment_id"] = deployment_id - return optional_params + if stop != None: + optional_params["stop_sequences"] = stop + if temperature != 1: + optional_params["temperature"] = temperature + if top_p != 1: + optional_params["top_p"] = top_p + return optional_params + else:# assume passing in params for openai/azure openai + if functions != []: + optional_params["functions"] = functions + if function_call != "": + optional_params["function_call"] = function_call + if temperature != 1: + optional_params["temperature"] = temperature + if top_p != 1: + optional_params["top_p"] = top_p + if n != 1: + optional_params["n"] = n + if stream: + optional_params["stream"] = stream + if stop != None: + optional_params["stop"] = stop + if max_tokens != float('inf'): + optional_params["max_tokens"] = max_tokens + if presence_penalty != 0: + optional_params["presence_penalty"] = presence_penalty + if frequency_penalty != 0: + optional_params["frequency_penalty"] = frequency_penalty + if logit_bias != {}: + optional_params["logit_bias"] = logit_bias + if user != "": + optional_params["user"] = user + if deployment_id != None: + optional_params["deployment_id"] = deployment_id + return optional_params def set_callbacks(callback_list): global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient From 5040d08f79edaf1cfbeac1e92d26ed3cdaa5918b Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 16:07:53 -0700 Subject: [PATCH 18/34] fix anthropic streaming --- litellm/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/main.py b/litellm/main.py index 8d8c78e256..0eac877247 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -239,7 +239,7 @@ def completion( max_tokens_to_sample=max_tokens_to_sample, **optional_params ) - if optional_params['stream'] == True: + if 'stream' in optional_params and optional_params['stream'] == True: # don't try to access stream object, response = CustomStreamWrapper(completion) return response From 276dae803e3ebcc176b9869c73b7c7cf22978ed5 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 16:10:52 -0700 Subject: [PATCH 19/34] anthropic streaming --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 20bc61f467..8515d1e8c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.362" +version = "0.1.363" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License" From d6f8cfa3d8895bf1ab60dd6577ac82f2c51e1e79 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 8 Aug 2023 16:19:34 -0700 Subject: [PATCH 20/34] Update README.md --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 8df5f3dde3..5c671dd056 100644 --- a/README.md +++ b/README.md @@ -51,10 +51,15 @@ pip install litellm==0.1.345 ## Streaming Queries liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response. +Streaming is supported for OpenAI, Azure, Anthropic models ```python response = completion(model="gpt-3.5-turbo", messages=messages, stream=True) for chunk in response: print(chunk['choices'][0]['delta']) +# claude 2 +result = litellm.completion('claude-2', messages, stream=True) +for chunk in result: + print(chunk['choices'][0]['delta']) ``` # hosted version From 654e8480d38156d93a8dd03f5976d2a10c7d33ac Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 16:20:49 -0700 Subject: [PATCH 21/34] example with Claude+streaming --- ...pic)_with_Streaming_liteLLM_Examples.ipynb | 406 ++++++++++++++++++ 1 file changed, 406 insertions(+) create mode 100644 cookbook/Claude_(Anthropic)_with_Streaming_liteLLM_Examples.ipynb diff --git a/cookbook/Claude_(Anthropic)_with_Streaming_liteLLM_Examples.ipynb b/cookbook/Claude_(Anthropic)_with_Streaming_liteLLM_Examples.ipynb new file mode 100644 index 0000000000..f3875ae608 --- /dev/null +++ b/cookbook/Claude_(Anthropic)_with_Streaming_liteLLM_Examples.ipynb @@ -0,0 +1,406 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZwuaylskLxFu", + "outputId": "d684d6a3-32fe-4beb-c378-c39134bcf8cc" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting litellm==0.1.363\n", + " Downloading litellm-0.1.363-py3-none-any.whl (34 kB)\n", + "Requirement already satisfied: openai<0.28.0,>=0.27.8 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (0.27.8)\n", + "Requirement already satisfied: python-dotenv<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (1.0.0)\n", + "Requirement already satisfied: tiktoken<0.5.0,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (0.4.0)\n", + "Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (2.31.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (4.65.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.8.5)\n", + "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken<0.5.0,>=0.4.0->litellm==0.1.363) (2022.10.31)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (2023.7.22)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (23.1.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (6.0.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (4.0.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.4.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.3.1)\n", + "Installing collected packages: litellm\n", + " Attempting uninstall: litellm\n", + " Found existing installation: litellm 0.1.362\n", + " Uninstalling litellm-0.1.362:\n", + " Successfully uninstalled litellm-0.1.362\n", + "Successfully installed litellm-0.1.363\n" + ] + } + ], + "source": [ + "!pip install litellm==\"0.1.363\"" + ] + }, + { + "cell_type": "code", + "source": [ + "# @title Import litellm & Set env variables\n", + "import litellm\n", + "import os\n", + "\n", + "os.environ[\"ANTHROPIC_API_KEY\"] = \" \" #@param" + ], + "metadata": { + "id": "W216G__XL19Q" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title Request Claude Instant-1 and Claude-2\n", + "messages = [\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", + " {\"role\": \"user\", \"content\": \"Who won the world series in 2020?\"}\n", + " ]\n", + "\n", + "result = litellm.completion('claude-instant-1', messages)\n", + "print(\"\\n\\n Result from claude-instant-1\", result)\n", + "result = litellm.completion('claude-2', messages, max_tokens=5, temperature=0.2)\n", + "print(\"\\n\\n Result from claude-2\", result)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ff1lKwUMMLJj", + "outputId": "bfddf6f8-36d4-45e5-92dc-349083fa41b8" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + " Result from claude-instant-1 {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': \" The Los Angeles Dodgers won the 2020 World Series, defeating the Tampa Bay Rays 4-2. It was the Dodgers' first World Series title since 1988.\"}}], 'created': 1691536677.2676156, 'model': 'claude-instant-1', 'usage': {'prompt_tokens': 30, 'completion_tokens': 32, 'total_tokens': 62}}\n", + "\n", + "\n", + " Result from claude-2 {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': ' The Los Angeles Dodgers won'}}], 'created': 1691536677.944753, 'model': 'claude-2', 'usage': {'prompt_tokens': 30, 'completion_tokens': 5, 'total_tokens': 35}}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# @title Streaming Example: Request Claude-2\n", + "messages = [\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", + " {\"role\": \"user\", \"content\": \"how does a court case get to the Supreme Court?\"}\n", + " ]\n", + "\n", + "result = litellm.completion('claude-2', messages, stream=True)\n", + "for chunk in result:\n", + " print(chunk['choices'][0]['delta'])\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "06hWKnNQMrV-", + "outputId": "7fdec0eb-d4a9-4882-f9c4-987ff9a31114" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Here\n", + "'s\n", + " a\n", + " quick\n", + " overview\n", + " of\n", + " how\n", + " a\n", + " court\n", + " case\n", + " can\n", + " reach\n", + " the\n", + " U\n", + ".\n", + "S\n", + ".\n", + " Supreme\n", + " Court\n", + ":\n", + "\n", + "\n", + "-\n", + " The\n", + " case\n", + " must\n", + " first\n", + " be\n", + " heard\n", + " in\n", + " a\n", + " lower\n", + " trial\n", + " court\n", + " (\n", + "either\n", + " a\n", + " state\n", + " court\n", + " or\n", + " federal\n", + " district\n", + " court\n", + ").\n", + " The\n", + " trial\n", + " court\n", + " makes\n", + " initial\n", + " r\n", + "ulings\n", + " and\n", + " produces\n", + " a\n", + " record\n", + " of\n", + " the\n", + " case\n", + ".\n", + "\n", + "\n", + "-\n", + " The\n", + " losing\n", + " party\n", + " can\n", + " appeal\n", + " the\n", + " decision\n", + " to\n", + " an\n", + " appeals\n", + " court\n", + " (\n", + "a\n", + " state\n", + " appeals\n", + " court\n", + " for\n", + " state\n", + " cases\n", + ",\n", + " or\n", + " a\n", + " federal\n", + " circuit\n", + " court\n", + " for\n", + " federal\n", + " cases\n", + ").\n", + " The\n", + " appeals\n", + " court\n", + " reviews\n", + " the\n", + " trial\n", + " court\n", + "'s\n", + " r\n", + "ulings\n", + " and\n", + " can\n", + " affirm\n", + ",\n", + " reverse\n", + ",\n", + " or\n", + " modify\n", + " the\n", + " decision\n", + ".\n", + "\n", + "\n", + "-\n", + " If\n", + " a\n", + " party\n", + " is\n", + " still\n", + " unsat\n", + "isf\n", + "ied\n", + " after\n", + " the\n", + " appeals\n", + " court\n", + " rules\n", + ",\n", + " they\n", + " can\n", + " petition\n", + " the\n", + " Supreme\n", + " Court\n", + " to\n", + " hear\n", + " the\n", + " case\n", + " through\n", + " a\n", + " writ\n", + " of\n", + " cert\n", + "ior\n", + "ari\n", + ".\n", + " \n", + "\n", + "\n", + "-\n", + " The\n", + " Supreme\n", + " Court\n", + " gets\n", + " thousands\n", + " of\n", + " cert\n", + " petitions\n", + " every\n", + " year\n", + " but\n", + " usually\n", + " only\n", + " agrees\n", + " to\n", + " hear\n", + " about\n", + " 100\n", + "-\n", + "150\n", + " of\n", + " cases\n", + " that\n", + " have\n", + " significant\n", + " national\n", + " importance\n", + " or\n", + " where\n", + " lower\n", + " courts\n", + " disagree\n", + " on\n", + " federal\n", + " law\n", + ".\n", + " \n", + "\n", + "\n", + "-\n", + " If\n", + " 4\n", + " out\n", + " of\n", + " the\n", + " 9\n", + " Just\n", + "ices\n", + " vote\n", + " to\n", + " grant\n", + " cert\n", + " (\n", + "agree\n", + " to\n", + " hear\n", + " the\n", + " case\n", + "),\n", + " it\n", + " goes\n", + " on\n", + " the\n", + " Supreme\n", + " Court\n", + "'s\n", + " do\n", + "cket\n", + " for\n", + " arguments\n", + ".\n", + "\n", + "\n", + "-\n", + " The\n", + " Supreme\n", + " Court\n", + " then\n", + " hears\n", + " oral\n", + " arguments\n", + ",\n", + " considers\n", + " written\n", + " brief\n", + "s\n", + ",\n", + " examines\n", + " the\n", + " lower\n", + " court\n", + " records\n", + ",\n", + " and\n", + " issues\n", + " a\n", + " final\n", + " ruling\n", + " on\n", + " the\n", + " case\n", + ",\n", + " which\n", + " serves\n", + " as\n", + " binding\n", + " precedent\n" + ] + } + ] + } + ] +} \ No newline at end of file From 72911a6bd01b0a978b70bedd51e6f60883a4babe Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 8 Aug 2023 16:22:18 -0700 Subject: [PATCH 22/34] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5c671dd056..36fc85b654 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ Streaming is supported for OpenAI, Azure, Anthropic models response = completion(model="gpt-3.5-turbo", messages=messages, stream=True) for chunk in response: print(chunk['choices'][0]['delta']) + # claude 2 result = litellm.completion('claude-2', messages, stream=True) for chunk in result: From ac460dd616c7c9e10ffb248eaf92be00dc2246de Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 8 Aug 2023 16:23:59 -0700 Subject: [PATCH 23/34] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 36fc85b654..b8884dd1e4 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ for chunk in response: print(chunk['choices'][0]['delta']) # claude 2 -result = litellm.completion('claude-2', messages, stream=True) +result = completion('claude-2', messages, stream=True) for chunk in result: print(chunk['choices'][0]['delta']) ``` From 613df8942f42d43503bcf45f3e3b73804c018b8a Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 17:01:58 -0700 Subject: [PATCH 24/34] add cohere streaming --- litellm/main.py | 27 +++++++++++++++++++++------ litellm/tests/test_completion.py | 14 ++++++++++++++ litellm/utils.py | 10 ++++++++++ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/litellm/main.py b/litellm/main.py index 0eac877247..17144a47f0 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -14,15 +14,24 @@ dotenv.load_dotenv() # Loading env variables using dotenv # TODO this will evolve to accepting models # replicate/anthropic/cohere class CustomStreamWrapper: - def __init__(self, completion_stream): - self.completion_stream = completion_stream + def __init__(self, completion_stream, model): + self.model = model + if model in litellm.cohere_models: + # cohere does not return an iterator, so we need to wrap it in one + self.completion_stream = iter(completion_stream) + else: + self.completion_stream = completion_stream def __iter__(self): return self def __next__(self): - chunk = next(self.completion_stream) - return {"choices": [{"delta": chunk.completion}]} + if self.model in litellm.anthropic_models: + chunk = next(self.completion_stream) + return {"choices": [{"delta": chunk.completion}]} + elif self.model in litellm.cohere_models: + chunk = next(self.completion_stream) + return {"choices": [{"delta": chunk.text}]} new_response = { "choices": [ @@ -241,7 +250,7 @@ def completion( ) if 'stream' in optional_params and optional_params['stream'] == True: # don't try to access stream object, - response = CustomStreamWrapper(completion) + response = CustomStreamWrapper(completion, model) return response completion_response = completion.completion @@ -277,8 +286,14 @@ def completion( ## COMPLETION CALL response = co.generate( model=model, - prompt = prompt + prompt = prompt, + **optional_params ) + if 'stream' in optional_params and optional_params['stream'] == True: + # don't try to access stream object, + response = CustomStreamWrapper(response, model) + return response + completion_response = response[0].text ## LOGGING logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn) diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 35f7b631d7..d5733e2fb4 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -57,6 +57,20 @@ def test_completion_cohere(): except Exception as e: pytest.fail(f"Error occurred: {e}") + +def test_completion_cohere_stream(): + try: + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "how does a court case get to the Supreme Court?"} + ] + response = completion(model="command-nightly", messages=messages, stream=True, max_tokens=50) + # Add any assertions here to check the response + for chunk in response: + print(chunk['choices'][0]['delta']) # same as openai format + except Exception as e: + pytest.fail(f"Error occurred: {e}") + def test_completion_openai(): try: response = completion(model="gpt-3.5-turbo", messages=messages) diff --git a/litellm/utils.py b/litellm/utils.py index 599c61e246..04e92737a5 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -161,6 +161,16 @@ def get_optional_params( if top_p != 1: optional_params["top_p"] = top_p return optional_params + elif model in litellm.cohere_models: + # handle cohere params + if stream: + optional_params["stream"] = stream + if temperature != 1: + optional_params["temperature"] = temperature + if max_tokens != float('inf'): + optional_params["max_tokens"] = max_tokens + return optional_params + else:# assume passing in params for openai/azure openai if functions != []: optional_params["functions"] = functions From d87ae075747cdf54aaf97823aa295e9ec7d8a465 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 17:02:34 -0700 Subject: [PATCH 25/34] with cohere streaming --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8515d1e8c8..c8de403b30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.363" +version = "0.1.364" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License" From f4048886abe867583e1c65c501a610b45fdc6acb Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 17:50:36 -0700 Subject: [PATCH 26/34] streaming replicate tests --- litellm/main.py | 37 +++++++----------------- litellm/tests/test_completion.py | 48 +++++++++++++++++++++----------- litellm/utils.py | 37 +++++++++++++++++++++++- 3 files changed, 78 insertions(+), 44 deletions(-) diff --git a/litellm/main.py b/litellm/main.py index 17144a47f0..b4a70709bf 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -7,32 +7,9 @@ import litellm from litellm import client, logging, exception_type, timeout, get_optional_params import tiktoken encoding = tiktoken.get_encoding("cl100k_base") -from litellm.utils import get_secret, install_and_import +from litellm.utils import get_secret, install_and_import, CustomStreamWrapper ####### ENVIRONMENT VARIABLES ################### dotenv.load_dotenv() # Loading env variables using dotenv - -# TODO this will evolve to accepting models -# replicate/anthropic/cohere -class CustomStreamWrapper: - def __init__(self, completion_stream, model): - self.model = model - if model in litellm.cohere_models: - # cohere does not return an iterator, so we need to wrap it in one - self.completion_stream = iter(completion_stream) - else: - self.completion_stream = completion_stream - - def __iter__(self): - return self - - def __next__(self): - if self.model in litellm.anthropic_models: - chunk = next(self.completion_stream) - return {"choices": [{"delta": chunk.completion}]} - elif self.model in litellm.cohere_models: - chunk = next(self.completion_stream) - return {"choices": [{"delta": chunk.text}]} - new_response = { "choices": [ { @@ -67,7 +44,7 @@ def completion( presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", deployment_id=None, # Optional liteLLM function params *, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False, - hugging_face = False + hugging_face = False, replicate=False, ): try: global new_response @@ -77,7 +54,8 @@ def completion( functions=functions, function_call=function_call, temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens, presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user, deployment_id=deployment_id, - model=model + # params to identify the model + model=model, replicate=replicate, hugging_face=hugging_face ) if azure == True: # azure configs @@ -172,7 +150,7 @@ def completion( model_response["model"] = model model_response["usage"] = response["usage"] response = model_response - elif "replicate" in model: + elif "replicate" in model or replicate == True: # import replicate/if it fails then pip install replicate install_and_import("replicate") import replicate @@ -196,6 +174,11 @@ def completion( output = replicate.run( model, input=input) + if 'stream' in optional_params and optional_params['stream'] == True: + # don't try to access stream object, + # let the stream handler know this is replicate + response = CustomStreamWrapper(output, "replicate") + return response response = "" for item in output: response += item diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index d5733e2fb4..304eb0303e 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -139,20 +139,36 @@ def test_completion_azure(): except Exception as e: pytest.fail(f"Error occurred: {e}") - - # Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect. -# [TODO] improve our try-except block to handle for these -# def test_completion_replicate_llama(): -# model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" -# try: -# response = completion(model=model_name, messages=messages, max_tokens=500) -# # Add any assertions here to check the response -# print(response) -# except Exception as e: -# print(f"in replicate llama, got error {e}") -# pass -# if e == "FunctionTimedOut": -# pass -# else: -# pytest.fail(f"Error occurred: {e}") +def test_completion_replicate_llama_stream(): + model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" + try: + response = completion(model=model_name, messages=messages, stream=True) + # Add any assertions here to check the response + for result in response: + print(result) + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + +def test_completion_replicate_stability_stream(): + model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb" + try: + response = completion(model=model_name, messages=messages, stream=True, replicate=True) + # Add any assertions here to check the response + for result in response: + print(result) + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + +def test_completion_replicate_stability(): + model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb" + try: + response = completion(model=model_name, messages=messages, replicate=True) + # Add any assertions here to check the response + for result in response: + print(result) + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 04e92737a5..c92440dce9 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -148,6 +148,8 @@ def get_optional_params( user = "", deployment_id = None, model = None, + replicate = False, + hugging_face = False, ): optional_params = {} if model in litellm.anthropic_models: @@ -170,7 +172,12 @@ def get_optional_params( if max_tokens != float('inf'): optional_params["max_tokens"] = max_tokens return optional_params - + elif replicate == True: + # any replicate models + # TODO: handle translating remaining replicate params + if stream: + optional_params["stream"] = stream + return optional_params else:# assume passing in params for openai/azure openai if functions != []: optional_params["functions"] = functions @@ -199,6 +206,7 @@ def get_optional_params( if deployment_id != None: optional_params["deployment_id"] = deployment_id return optional_params + return optional_params def set_callbacks(callback_list): global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient @@ -557,3 +565,30 @@ def get_secret(secret_name): return os.environ.get(secret_name) else: return os.environ.get(secret_name) + +######## Streaming Class ############################ +# wraps the completion stream to return the correct format for the model +# replicate/anthropic/cohere +class CustomStreamWrapper: + def __init__(self, completion_stream, model): + self.model = model + if model in litellm.cohere_models: + # cohere does not return an iterator, so we need to wrap it in one + self.completion_stream = iter(completion_stream) + else: + self.completion_stream = completion_stream + + def __iter__(self): + return self + + def __next__(self): + if self.model in litellm.anthropic_models: + chunk = next(self.completion_stream) + return {"choices": [{"delta": chunk.completion}]} + elif self.model == "replicate": + chunk = next(self.completion_stream) + return {"choices": [{"delta": chunk}]} + elif self.model in litellm.cohere_models: + chunk = next(self.completion_stream) + return {"choices": [{"delta": chunk.text}]} + From e28576c835dcb711d9e2008482149174fe6496f7 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 8 Aug 2023 17:54:29 -0700 Subject: [PATCH 27/34] bump package version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c8de403b30..87d67d4fa7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.364" +version = "0.1.365" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License" From 8e8ba0315b2a47bc8e5ee41b3163f7bbbb2e5825 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 8 Aug 2023 20:47:02 -0700 Subject: [PATCH 28/34] add helper functions for token usage calculation --- litellm/__init__.py | 19 +++++++++++++++- litellm/tests.txt | 1 - litellm/utils.py | 55 ++++++++++++++++++++++++++++++++------------- pyproject.toml | 2 +- 4 files changed, 59 insertions(+), 18 deletions(-) delete mode 100644 litellm/tests.txt diff --git a/litellm/__init__.py b/litellm/__init__.py index 01559e3d18..4c18d0e63c 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -10,8 +10,25 @@ azure_key = None anthropic_key = None replicate_key = None cohere_key = None - hugging_api_token = None + +model_cost = { + "gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, + "gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name + "gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, + "gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, + "gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, + "gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name + "gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, + "gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, + "gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, + "gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012}, + "claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551}, + "claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268}, + "text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004}, + "chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002}, + "command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015}, +} ####### THREAD-SPECIFIC DATA ################### class MyLocal(threading.local): def __init__(self): diff --git a/litellm/tests.txt b/litellm/tests.txt deleted file mode 100644 index 4f67a836c5..0000000000 --- a/litellm/tests.txt +++ /dev/null @@ -1 +0,0 @@ -test 1 \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index c92440dce9..b47e082712 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -131,6 +131,46 @@ def client(original_function): raise e return wrapper +####### USAGE CALCULATOR ################ + +def prompt_token_calculator(model, messages): + # use tiktoken or anthropic's tokenizer depending on the model + text = " ".join(message["content"] for message in messages) + num_tokens = 0 + if "claude" in model: + install_and_import('anthropic') + from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT + anthropic = Anthropic() + num_tokens = anthropic.count_tokens(text) + else: + num_tokens = len(encoding.encode(text)) + return num_tokens + + +def cost_per_token(model="gpt-3.5-turbo", prompt_tokens = 0, completion_tokens = 0): + ## given + prompt_tokens_cost_usd_dollar = 0 + completion_tokens_cost_usd_dollar = 0 + model_cost_ref = litellm.model_cost + if model in model_cost_ref: + prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_token"] * prompt_tokens + completion_tokens_cost_usd_dollar = model_cost_ref[model]["output_cost_per_token"] * completion_tokens + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + else: + # calculate average input cost + input_cost_sum = 0 + output_cost_sum = 0 + model_cost_ref = litellm.model_cost + for model in model_cost_ref: + input_cost_sum += model_cost_ref[model]["input_cost_per_token"] + output_cost_sum += model_cost_ref[model]["output_cost_per_token"] + avg_input_cost = input_cost_sum / len(model_cost_ref.keys()) + avg_output_cost = output_cost_sum / len(model_cost_ref.keys()) + prompt_tokens_cost_usd_dollar = avg_input_cost * prompt_tokens + completion_tokens_cost_usd_dollar = avg_output_cost * completion_tokens + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + + ####### HELPER FUNCTIONS ################ def get_optional_params( # 12 optional params @@ -367,21 +407,6 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k logging(logger_fn=user_logger_fn, exception=e) pass -def prompt_token_calculator(model, messages): - # use tiktoken or anthropic's tokenizer depending on the model - text = " ".join(message["content"] for message in messages) - num_tokens = 0 - if "claude" in model: - install_and_import('anthropic') - from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT - anthropic = Anthropic() - num_tokens = anthropic.count_tokens(text) - else: - num_tokens = len(encoding.encode(text)) - return num_tokens - - - def handle_success(args, kwargs, result, start_time, end_time): global heliconeLogger, aispendLogger try: diff --git a/pyproject.toml b/pyproject.toml index 87d67d4fa7..dc608b8411 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.365" +version = "0.1.366" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License" From ee6c45ca6a340930218b168befa8dbe38d8642bc Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 8 Aug 2023 21:11:06 -0700 Subject: [PATCH 29/34] add token usage --- docs/token_usage.md | 45 +++++++++++++++++++++++++++++++++++++++++++++ litellm/__init__.py | 2 +- litellm/utils.py | 24 +++++++++++++++++++++--- mkdocs.yml | 2 ++ pyproject.toml | 2 +- 5 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 docs/token_usage.md diff --git a/docs/token_usage.md b/docs/token_usage.md new file mode 100644 index 0000000000..5bf2fbd3df --- /dev/null +++ b/docs/token_usage.md @@ -0,0 +1,45 @@ +# Token Usage +By default LiteLLM returns token usage in all completion requests ([See here](https://litellm.readthedocs.io/en/latest/output/)) + +However, we also expose 3 public helper functions to calculate token usage across providers: + +- `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available. + +- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/cookbook/community-resources/max_tokens.json). + +- `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output). + +## Example Usage + +1. `token_counter` + +```python +from litellm import token_counter + +messages = [{"user": "role", "content": "Hey, how's it going"}] +print(token_counter(model="gpt-3.5-turbo", messages=messages)) +``` + +2. `cost_per_token` + +```python +from litellm import cost_per_token + +prompt_tokens = 5 +completion_tokens = 10 +prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(model="gpt-3.5-turbo", prompt_tokens=prompt_tokens, completion_tokens=completion_tokens)) + +print(prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar) +``` + +3. `completion_cost` + +```python +from litellm import completion_cost + +prompt = "Hey, how's it going" +completion = "Hi, I'm gpt - I am doing well" +cost_of_query = completion_cost(model="gpt-3.5-turbo", prompt=prompt, completion=completion)) + +print(cost_of_query) +``` diff --git a/litellm/__init__.py b/litellm/__init__.py index 4c18d0e63c..9b0154dda7 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -89,7 +89,7 @@ open_ai_embedding_models = [ 'text-embedding-ada-002' ] from .timeout import timeout -from .utils import client, logging, exception_type, get_optional_params, modify_integration +from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost from .main import * # Import all the symbols from main.py from .integrations import * from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index b47e082712..b81e9bc0d5 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -133,9 +133,8 @@ def client(original_function): ####### USAGE CALCULATOR ################ -def prompt_token_calculator(model, messages): +def token_counter(model, text): # use tiktoken or anthropic's tokenizer depending on the model - text = " ".join(message["content"] for message in messages) num_tokens = 0 if "claude" in model: install_and_import('anthropic') @@ -168,9 +167,15 @@ def cost_per_token(model="gpt-3.5-turbo", prompt_tokens = 0, completion_tokens = avg_output_cost = output_cost_sum / len(model_cost_ref.keys()) prompt_tokens_cost_usd_dollar = avg_input_cost * prompt_tokens completion_tokens_cost_usd_dollar = avg_output_cost * completion_tokens - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar +def completion_cost(model="gpt-3.5-turbo", prompt="", completion=""): + prompt_tokens = tokenizer(model=model, text=prompt) + completion_tokens = tokenizer(model=model, text=completion) + prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(model=model, prompt_tokens = prompt_tokens, completion_tokens = completion_tokens) + return prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar + ####### HELPER FUNCTIONS ################ def get_optional_params( # 12 optional params @@ -466,6 +471,19 @@ def handle_success(args, kwargs, result, start_time, end_time): print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}") pass +def prompt_token_calculator(model, messages): + # use tiktoken or anthropic's tokenizer depending on the model + text = " ".join(message["content"] for message in messages) + num_tokens = 0 + if "claude" in model: + install_and_import('anthropic') + from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT + anthropic = Anthropic() + num_tokens = anthropic.count_tokens(text) + else: + num_tokens = len(encoding.encode(text)) + return num_tokens + # integration helper function def modify_integration(integration_name, integration_params): global supabaseClient diff --git a/mkdocs.yml b/mkdocs.yml index e7326d0d67..97ed0d9ed8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -6,6 +6,8 @@ nav: - Input - Request Body: input.md - Output - Response Object: output.md - Streaming & Async Calls: stream.md + - token usage: + - Helper Functions: token_usage.md - 🤖 Supported LLM APIs: - Supported Completion & Chat APIs: supported.md - Supported Embedding APIs: supported_embedding.md diff --git a/pyproject.toml b/pyproject.toml index dc608b8411..0600035ca2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.366" +version = "0.1.367" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License" From 95571d1f48a7a284998a38b58cba27ddc9252782 Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Wed, 9 Aug 2023 06:04:32 -0700 Subject: [PATCH 30/34] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b8884dd1e4..1a356a37bd 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ litellm manages: - guarantees consistent output, text responses will always be available at `['choices'][0]['message']['content']` # usage - +Demo - https://litellm.ai/ \ Read the docs - https://litellm.readthedocs.io/en/latest/ ## quick start From bcb7e390555abdeaeafee52b36dc894ff00eec2f Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Wed, 9 Aug 2023 06:09:28 -0700 Subject: [PATCH 31/34] Update README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1a356a37bd..ca0e9db714 100644 --- a/README.md +++ b/README.md @@ -63,11 +63,11 @@ for chunk in result: print(chunk['choices'][0]['delta']) ``` -# hosted version -- [Grab time if you want access 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) +# support / talk with founders +- [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) +- [Community Discord 💭](https://discord.gg/wuPM9dRgDw) +- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬ +- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai # why did we build this - **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI, Cohere - -# Support -Contact us at ishaan@berri.ai / krrish@berri.ai From 2aa53738730c319bd7c7641efdb0485d5bd180fa Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Wed, 9 Aug 2023 06:10:00 -0700 Subject: [PATCH 32/34] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ca0e9db714..ac76d795da 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Get Support / Join the community 👉 [![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw) -a simple & light package to call OpenAI, Azure, Cohere, Anthropic API Endpoints +a simple & light package to call OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints litellm manages: - translating inputs to completion and embedding endpoints From 2b84abbe27aad13df202380a6bd3cd12207a39e0 Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Wed, 9 Aug 2023 06:16:19 -0700 Subject: [PATCH 33/34] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ac76d795da..14b5ecd2ce 100644 --- a/README.md +++ b/README.md @@ -7,12 +7,12 @@ Get Support / Join the community 👉 [![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw) -a simple & light package to call OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints +a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints litellm manages: -- translating inputs to completion and embedding endpoints -- guarantees consistent output, text responses will always be available at `['choices'][0]['message']['content']` - +- translating inputs to the provider's completion and embedding endpoints +- guarantees [consistent output](https://litellm.readthedocs.io/en/latest/output/), text responses will always be available at `['choices'][0]['message']['content']` +- exception mapping - common exceptions across providers are mapped to the [OpenAI exception types](https://help.openai.com/en/articles/6897213-openai-library-error-types-guidance) # usage Demo - https://litellm.ai/ \ Read the docs - https://litellm.readthedocs.io/en/latest/ From 00dc207eeecf3aca060579bf8317cf912a1a8b58 Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Wed, 9 Aug 2023 06:17:08 -0700 Subject: [PATCH 34/34] Update README.md --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 14b5ecd2ce..488c211b85 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,9 @@ ![Downloads](https://img.shields.io/pypi/dm/litellm) [![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere%7CReplicate%7CHugging%20Face-blue?color=green)](https://github.com/BerriAI/litellm) -Get Support / Join the community 👉 [![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw) +[![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw) -a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints - -litellm manages: +a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints. It manages: - translating inputs to the provider's completion and embedding endpoints - guarantees [consistent output](https://litellm.readthedocs.io/en/latest/output/), text responses will always be available at `['choices'][0]['message']['content']` - exception mapping - common exceptions across providers are mapped to the [OpenAI exception types](https://help.openai.com/en/articles/6897213-openai-library-error-types-guidance)