From 03a43e158a3b15452432a41bbc526581b461e881 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 10:37:41 -0700
Subject: [PATCH 01/34] add import manager - make package lighter

---
 litellm/integrations/helicone.py |  2 +-
 litellm/main.py                  | 17 ++++++++++++-----
 litellm/tests/test_completion.py | 18 ++++++++++--------
 litellm/utils.py                 | 16 +++++++++++++++-
 pyproject.toml                   |  9 ++++-----
 requirements.txt                 |  5 +----
 6 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/litellm/integrations/helicone.py b/litellm/integrations/helicone.py
index 6b3d619659..9e74b246f9 100644
--- a/litellm/integrations/helicone.py
+++ b/litellm/integrations/helicone.py
@@ -2,7 +2,6 @@
 #    On success, logs events to Helicone
 import dotenv, os
 import requests
-from anthropic import HUMAN_PROMPT, AI_PROMPT
 dotenv.load_dotenv() # Loading env variables using dotenv
 import traceback
 class HeliconeLogger:
@@ -14,6 +13,7 @@ class HeliconeLogger:
         self.key = os.getenv('HELICONE_API_KEY')
 
     def claude_mapping(self, model, messages, response_obj):
+        from anthropic import HUMAN_PROMPT, AI_PROMPT
         prompt = f"{HUMAN_PROMPT}" 
         for message in messages:
             if "role" in message:
diff --git a/litellm/main.py b/litellm/main.py
index 7803de2a96..ea4d43a638 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -1,6 +1,5 @@
-import os, openai, cohere, replicate, sys
+import os, openai, sys
 from typing import Any
-from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
 from functools import partial
 import dotenv, traceback, random, asyncio, time
 from copy import deepcopy
@@ -13,7 +12,7 @@ from tenacity import (
     stop_after_attempt,
     wait_random_exponential,
 )  # for exponential backoff
-from litellm.utils import get_secret
+from litellm.utils import get_secret, install_and_import
 ####### ENVIRONMENT VARIABLES ###################
 dotenv.load_dotenv() # Loading env variables using dotenv
 
@@ -28,9 +27,7 @@ new_response = {
           }
         ]
       }
-# TODO move this to utils.py
 # TODO add translations
-# TODO see if this worked - model_name == krrish
 ####### COMPLETION ENDPOINTS ################
 #############################################
 async def acompletion(*args, **kwargs):
@@ -68,6 +65,7 @@ def completion(
       openai.api_type = "azure"
       openai.api_base = litellm.api_base if litellm.api_base is not None else get_secret("AZURE_API_BASE")
       openai.api_version = litellm.api_version if litellm.api_version is not None else get_secret("AZURE_API_VERSION")
+      # set key
       if api_key:
           openai.api_key = api_key
       elif litellm.azure_key:
@@ -92,6 +90,7 @@ def completion(
         )
     elif model in litellm.open_ai_chat_completion_models:
       openai.api_type = "openai"
+      # note: if a user sets a custom base - we should ensure this works
       openai.api_base = litellm.api_base if litellm.api_base is not None else "https://api.openai.com/v1"
       openai.api_version = None
       if litellm.organization:
@@ -155,6 +154,8 @@ def completion(
       model_response["usage"] = response["usage"]
       response = model_response
     elif "replicate" in model:
+      # import replicate/if it fails then pip install replicate
+      install_and_import("replicate")
       # replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
       # checking in case user set it to REPLICATE_API_KEY instead 
       if not get_secret("REPLICATE_API_TOKEN") and get_secret("REPLICATE_API_KEY"):
@@ -194,6 +195,10 @@ def completion(
         }
       response = model_response
     elif model in litellm.anthropic_models:
+      # import anthropic/if it fails then pip install anthropic
+      install_and_import("anthropic")
+      from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
+
       #anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
       if api_key:
          os.environ["ANTHROPIC_API_KEY"] = api_key
@@ -239,6 +244,8 @@ def completion(
         }
       response = model_response
     elif model in litellm.cohere_models:
+      # import cohere/if it fails then pip install cohere
+      install_and_import("cohere")
       if api_key:
         cohere_key = api_key
       elif litellm.cohere_key:
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index a4c151e5bc..925483f32f 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -7,8 +7,10 @@ sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the
 import pytest
 import litellm
 from litellm import embedding, completion
+from infisical import InfisicalClient
 
 # litellm.set_verbose = True
+litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
 
 user_message = "Hello, whats the weather in San Francisco??"
 messages = [{ "content": user_message,"role": "user"}]
@@ -16,6 +18,14 @@ messages = [{ "content": user_message,"role": "user"}]
 def logger_fn(user_model_dict):
     print(f"user_model_dict: {user_model_dict}")
 
+def test_completion_claude():
+    try:
+        response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
+        # Add any assertions here to check the response
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
 def test_completion_openai():
     try:
         response = completion(model="gpt-3.5-turbo", messages=messages)
@@ -84,14 +94,6 @@ def test_completion_azure():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
-def test_completion_claude():
-    try:
-        response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
-        # Add any assertions here to check the response
-        print(response)
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
 def test_completion_cohere():
     try:
         response = completion(model="command-nightly", messages=messages, max_tokens=500)
diff --git a/litellm/utils.py b/litellm/utils.py
index d32b946892..5b48201314 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -4,7 +4,6 @@ import subprocess, os
 import litellm, openai 
 import random, uuid, requests
 import datetime, time
-from anthropic import Anthropic
 import tiktoken
 encoding = tiktoken.get_encoding("cl100k_base")
 from .integrations.helicone import HeliconeLogger
@@ -34,6 +33,19 @@ def print_verbose(print_statement):
     if random.random() <= 0.3:
       print("Get help - https://discord.com/invite/wuPM9dRgDw")
 
+####### Package Import Handler ###################
+import importlib
+import subprocess
+def install_and_import(package):
+    try:
+        importlib.import_module(package)
+    except ImportError:
+        print(f"{package} is not installed. Installing...")
+        subprocess.call([sys.executable, '-m', 'pip', 'install', package])
+    finally:
+        globals()[package] = importlib.import_module(package)
+##################################################
+
 ####### LOGGING ###################
 #Logging function -> log the exact model details + what's being sent | Non-Blocking
 def logging(model=None, input=None, azure=False, additional_args={}, logger_fn=None, exception=None):
@@ -329,6 +341,8 @@ def prompt_token_calculator(model, messages):
   text = " ".join(message["content"] for message in messages)
   num_tokens = 0
   if "claude" in model:
+    install_and_import('anthropic')
+    from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
     anthropic = Anthropic()
     num_tokens = anthropic.count_tokens(text)
   else:
diff --git a/pyproject.toml b/pyproject.toml
index 9953d8fe4c..20916e7823 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,12 +8,11 @@ readme = "README.md"
 
 [tool.poetry.dependencies]
 python = "^3.8"
-openai = {extras = ["datalib"], version = "^0.27.8"}
-cohere = "^4.18.0"
+openai = "^0.27.8"
+
 pytest = "^7.4.0"
-pydantic = "^2.1.1"
-anthropic = "^0.3.7"
-replicate = "^0.10.0"
+
+
 python-dotenv = "^1.0.0"
 tenacity = "^8.0.1"
 tiktoken = "^0.4.0"
diff --git a/requirements.txt b/requirements.txt
index 87aa1e7376..ba5e487e38 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,5 @@
-pydantic
+# used by CI/CD testing
 openai
-cohere
-anthropic
-replicate
 pytest
 python-dotenv
 openai[datalib]

From 8e863120feb97214119184cf141c5bb24d9fbce9 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 10:46:20 -0700
Subject: [PATCH 02/34] add import for co, anth

---
 .DS_Store                        | Bin 6148 -> 6148 bytes
 litellm/main.py                  |   2 ++
 litellm/tests/test_completion.py |  16 +++++++++-------
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/.DS_Store b/.DS_Store
index 17cee6a0acfa4d370d7d9b919baa2fb3a72b4c2f..22073074832468582ce431f5bcd4c2f80be08229 100644
GIT binary patch
delta 53
zcmZoMXfc@JFUrBdz`)4BAi&_6lb@WFlb;0S3v3qTSkBDIu{nT6m~rz`)+DBh4Q-p*
IIsWnk075|xYybcN

delta 182
zcmZoMXfc@JFUrNhz`)4BAi%(o!;s2Q!jQ?3&ycrSkYhQsD@aO+AsHx;%TRzU%@7Y%
zn95KLB=vwYm7Y2I$w@i+NkH{La~ReGvEF|$U|`tH!y?RRF9=kg%a8;lQ-Cm&A&(&)
Uzafm>D2B+erZR12=lIJH09xKHMgRZ+

diff --git a/litellm/main.py b/litellm/main.py
index ea4d43a638..33f7676d39 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -156,6 +156,7 @@ def completion(
     elif "replicate" in model:
       # import replicate/if it fails then pip install replicate
       install_and_import("replicate")
+      import replicate
       # replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
       # checking in case user set it to REPLICATE_API_KEY instead 
       if not get_secret("REPLICATE_API_TOKEN") and get_secret("REPLICATE_API_KEY"):
@@ -246,6 +247,7 @@ def completion(
     elif model in litellm.cohere_models:
       # import cohere/if it fails then pip install cohere
       install_and_import("cohere")
+      import cohere
       if api_key:
         cohere_key = api_key
       elif litellm.cohere_key:
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 925483f32f..52c9373cc6 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -26,6 +26,14 @@ def test_completion_claude():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
+def test_completion_cohere():
+    try:
+        response = completion(model="command-nightly", messages=messages, max_tokens=500)
+        # Add any assertions here to check the response
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
 def test_completion_openai():
     try:
         response = completion(model="gpt-3.5-turbo", messages=messages)
@@ -94,13 +102,7 @@ def test_completion_azure():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
-def test_completion_cohere():
-    try:
-        response = completion(model="command-nightly", messages=messages, max_tokens=500)
-        # Add any assertions here to check the response
-        print(response)
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
+
 
 # Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect. 
 # [TODO] improve our try-except block to handle for these

From 3332cc2065ef9773e5a31f2c306f9b186c3983d1 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 10:58:43 -0700
Subject: [PATCH 03/34] remove deps datalib, pytest, tenacity, infisical

---
 .circleci/config.yml            |  2 ++
 litellm/__init__.py             |  2 +-
 litellm/main.py                 |  5 -----
 litellm/tests/test_embedding.py | 20 ++++++++++++++++++++
 pyproject.toml                  |  4 ----
 requirements.txt                |  7 ++-----
 6 files changed, 25 insertions(+), 15 deletions(-)
 create mode 100644 litellm/tests/test_embedding.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 20260abceb..edefc4b7e0 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -13,6 +13,8 @@ jobs:
           command: |
             python -m pip install --upgrade pip
             python -m pip install -r requirements.txt
+            pip install infisical
+            pip install pytest
 
       # Run pytest and generate JUnit XML report
       - run:
diff --git a/litellm/__init__.py b/litellm/__init__.py
index 937b22c14b..aaa36737ef 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -4,7 +4,7 @@ failure_callback = []
 set_verbose=False
 telemetry=True
 max_tokens = 256 # OpenAI Defaults
-retry = True # control tenacity retries. 
+retry = True
 openai_key = None 
 azure_key = None 
 anthropic_key = None 
diff --git a/litellm/main.py b/litellm/main.py
index 33f7676d39..299376458f 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -7,11 +7,6 @@ import litellm
 from litellm import client, logging, exception_type, timeout, get_optional_params
 import tiktoken
 encoding = tiktoken.get_encoding("cl100k_base")
-from tenacity import (
-    retry,
-    stop_after_attempt,
-    wait_random_exponential,
-)  # for exponential backoff
 from litellm.utils import get_secret, install_and_import
 ####### ENVIRONMENT VARIABLES ###################
 dotenv.load_dotenv() # Loading env variables using dotenv
diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py
new file mode 100644
index 0000000000..be2b30a81b
--- /dev/null
+++ b/litellm/tests/test_embedding.py
@@ -0,0 +1,20 @@
+
+import sys, os
+import traceback
+import pytest
+
+sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+import litellm
+from litellm import embedding, completion
+from infisical import InfisicalClient
+
+# litellm.set_verbose = True
+litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
+
+def test_openai_embedding():
+    try:
+        response = embedding(model='text-embedding-ada-002', input=["good morning from litellm"])
+        # Add any assertions here to check the response
+        print(f"response: {str(response)}")
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
diff --git a/pyproject.toml b/pyproject.toml
index 20916e7823..d75e1762d5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,12 +9,8 @@ readme = "README.md"
 [tool.poetry.dependencies]
 python = "^3.8"
 openai = "^0.27.8"
-
 pytest = "^7.4.0"
-
-
 python-dotenv = "^1.0.0"
-tenacity = "^8.0.1"
 tiktoken = "^0.4.0"
 
 [build-system]
diff --git a/requirements.txt b/requirements.txt
index ba5e487e38..56f796b35c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,5 @@
 # used by CI/CD testing
 openai
-pytest
 python-dotenv
-openai[datalib]
-tenacity
-tiktoken
-infisical
\ No newline at end of file
+openai
+tiktoken
\ No newline at end of file

From 33547c761148b8e5cbf77b673d4b5909e17e4cbe Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 11:32:31 -0700
Subject: [PATCH 04/34] fix circle ci test

---
 .circleci/config.yml            | 1 +
 litellm/tests/test_embedding.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index edefc4b7e0..397031de7d 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -15,6 +15,7 @@ jobs:
             python -m pip install -r requirements.txt
             pip install infisical
             pip install pytest
+            pip install openai[datalib]
 
       # Run pytest and generate JUnit XML report
       - run:
diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py
index be2b30a81b..ce83ffc70a 100644
--- a/litellm/tests/test_embedding.py
+++ b/litellm/tests/test_embedding.py
@@ -17,4 +17,4 @@ def test_openai_embedding():
         # Add any assertions here to check the response
         print(f"response: {str(response)}")
     except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
+        pytest.fail(f"Error occurred: {e}")
\ No newline at end of file

From 79af0ea052f6ae0ec0725eea1f567c7d499bd8fe Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 11:34:43 -0700
Subject: [PATCH 05/34] remove pytest as a package dep

---
 pyproject.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d75e1762d5..92434afdf3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,6 @@ readme = "README.md"
 [tool.poetry.dependencies]
 python = "^3.8"
 openai = "^0.27.8"
-pytest = "^7.4.0"
 python-dotenv = "^1.0.0"
 tiktoken = "^0.4.0"
 

From d72dc244b134da3cf5df1f2e6dc9486ce15e5427 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 11:35:20 -0700
Subject: [PATCH 06/34] new version toml

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 92434afdf3..f53cd4d49a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.356"
+version = "0.1.360"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"

From 5c17f90173bab07c4591b5cf83fab9f46308bf7b Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 13:35:37 -0700
Subject: [PATCH 07/34] add hf support

---
 litellm/__init__.py              |  2 ++
 litellm/main.py                  | 29 ++++++++++++++++++++++++++++-
 litellm/tests/test_completion.py | 10 ++++++++++
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/litellm/__init__.py b/litellm/__init__.py
index aaa36737ef..01559e3d18 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -10,6 +10,8 @@ azure_key = None
 anthropic_key = None 
 replicate_key = None 
 cohere_key = None 
+
+hugging_api_token = None
 ####### THREAD-SPECIFIC DATA ###################
 class MyLocal(threading.local):
     def __init__(self):
diff --git a/litellm/main.py b/litellm/main.py
index 299376458f..4fc3bcb05e 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -44,7 +44,8 @@ def completion(
     temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'),
     presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", deployment_id=None,
     # Optional liteLLM function params
-    *, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False
+    *, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False,
+    hugging_face = False
   ):
   try:
     global new_response
@@ -273,6 +274,32 @@ def completion(
           "total_tokens": prompt_tokens + completion_tokens
         }
       response = model_response
+    elif hugging_face == True:
+      import requests
+      API_URL = f"https://api-inference.huggingface.co/models/{model}"
+      HF_TOKEN = get_secret("HF_TOKEN")
+      headers = {"Authorization": f"Bearer {HF_TOKEN}"}
+
+      prompt = " ".join([message["content"] for message in messages])
+      ## LOGGING
+      logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
+      input_payload = {"inputs": prompt}
+      response = requests.post(API_URL, headers=headers, json=input_payload)
+  
+      completion_response = response.json()[0]['generated_text']
+      ## LOGGING
+      logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
+      prompt_tokens = len(encoding.encode(prompt))
+      completion_tokens = len(encoding.encode(completion_response))
+      ## RESPONSE OBJECT
+      model_response["choices"][0]["message"]["content"] = completion_response
+      model_response["created"] = time.time()
+      model_response["model"] = model
+      model_response["usage"] = {
+          "prompt_tokens": prompt_tokens,
+          "completion_tokens": completion_tokens,
+          "total_tokens": prompt_tokens + completion_tokens
+        }
     else: 
       ## LOGGING
       logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 52c9373cc6..e001daa615 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -26,6 +26,16 @@ def test_completion_claude():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
+def test_completion_hf_api():
+    try:
+        user_message = "write some code to find the sum of two numbers"
+        messages = [{ "content": user_message,"role": "user"}]
+        response = completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True)
+        # Add any assertions here to check the response
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
 def test_completion_cohere():
     try:
         response = completion(model="command-nightly", messages=messages, max_tokens=500)

From 2a216fbb3edc1351603572aee9f8e604afb5503c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 13:37:32 -0700
Subject: [PATCH 08/34] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3ddbe4ecbf..bcb0608f6d 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 [![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.345-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/)
 [![CircleCI](https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main)
 ![Downloads](https://img.shields.io/pypi/dm/litellm)
-[![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere-blue?color=green)](https://github.com/BerriAI/litellm)
+[![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere%7CReplicate%7CHugging%20Face-blue?color=green)](https://github.com/BerriAI/litellm)
 
 Get Support / Join the community 👉 [![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw)
 

From 03a330517d849dba0976a5339b729658e0037c43 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 13:45:47 -0700
Subject: [PATCH 09/34] new version toml

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index f53cd4d49a..d260539972 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.360"
+version = "0.1.361"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"

From b4bc5edde06696c974c00c3b58fbf4380ff10d18 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 13:55:27 -0700
Subject: [PATCH 10/34] add docs

---
 docs/supported.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/docs/supported.md b/docs/supported.md
index 692a55e7dc..0eb2ec3d15 100644
--- a/docs/supported.md
+++ b/docs/supported.md
@@ -36,3 +36,16 @@
 | claude-instant-1  | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
 | claude-v2  | `completion('claude-v2', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
 
+### Hugging Face Inference API
+All `text2text-generation`, `text-generation` are supported by liteLLM
+In order to use models on hugging face inference:
+* copy the `model repo` from hugging face and set it as the `model` parameter in the completion call.
+* set `hugging_face` to `True`
+
+| Model Name       | Function Call                              | Required OS Variables                |
+|------------------|--------------------------------------------|--------------------------------------|
+| <a href="https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k" target="_blank">stabilityai/stablecode-completion-alpha-3b-4k</a>  | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True) | `os.environ['HF_TOKEN']`       |
+| <a href="https://huggingface.co/google/flan-t5-xxl" target="_blank">google/flan-t5-xxl</a>  | `completion(model="google/flan-t5-xxl", messages=messages, hugging_face=True) | `os.environ['HF_TOKEN']`       |
+
+
+

From a94dc9369f2727c4433b9043aed8da7873ba262f Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 13:58:58 -0700
Subject: [PATCH 11/34] fix model response

---
 litellm/main.py | 1 +
 pyproject.toml  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/litellm/main.py b/litellm/main.py
index 4fc3bcb05e..8f7873099c 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -300,6 +300,7 @@ def completion(
           "completion_tokens": completion_tokens,
           "total_tokens": prompt_tokens + completion_tokens
         }
+      response = model_response
     else: 
       ## LOGGING
       logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
diff --git a/pyproject.toml b/pyproject.toml
index d260539972..20bc61f467 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.361"
+version = "0.1.362"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"

From 318c0cbada3c7e096283c22b670c30630d98fa1e Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 14:21:11 -0700
Subject: [PATCH 12/34] add example on using HF models

---
 .../community-resources}/max_tokens.json      |   0
 cookbook/liteLLM_Hugging_Face_Example.ipynb   | 153 ++++++++++++++++++
 2 files changed, 153 insertions(+)
 rename {community_resources => cookbook/community-resources}/max_tokens.json (100%)
 create mode 100644 cookbook/liteLLM_Hugging_Face_Example.ipynb

diff --git a/community_resources/max_tokens.json b/cookbook/community-resources/max_tokens.json
similarity index 100%
rename from community_resources/max_tokens.json
rename to cookbook/community-resources/max_tokens.json
diff --git a/cookbook/liteLLM_Hugging_Face_Example.ipynb b/cookbook/liteLLM_Hugging_Face_Example.ipynb
new file mode 100644
index 0000000000..9c64e2df25
--- /dev/null
+++ b/cookbook/liteLLM_Hugging_Face_Example.ipynb
@@ -0,0 +1,153 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Install liteLLM https://github.com/BerriAI/litellm\n",
+        "liteLLM provides one interface to call gpt 3.5, hugging face inference endpoints"
+      ],
+      "metadata": {
+        "id": "IGQZtR61AZSd"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "x_4jcmmXcdm-",
+        "outputId": "c89e7817-561d-4867-904b-aa1634565cbb"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: litellm==0.1.362 in /usr/local/lib/python3.10/dist-packages (0.1.362)\n",
+            "Requirement already satisfied: openai<0.28.0,>=0.27.8 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.362) (0.27.8)\n",
+            "Requirement already satisfied: python-dotenv<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.362) (1.0.0)\n",
+            "Requirement already satisfied: tiktoken<0.5.0,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.362) (0.4.0)\n",
+            "Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.362) (2.28.2)\n",
+            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.362) (4.65.0)\n",
+            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.362) (3.8.5)\n",
+            "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken<0.5.0,>=0.4.0->litellm==0.1.362) (2022.10.31)\n",
+            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.362) (3.2.0)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.362) (3.4)\n",
+            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.362) (1.26.16)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.362) (2023.7.22)\n",
+            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (23.1.0)\n",
+            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (6.0.4)\n",
+            "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (4.0.2)\n",
+            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (1.9.2)\n",
+            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (1.4.0)\n",
+            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.362) (1.3.1)\n"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install litellm==\"0.1.362\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from litellm import completion\n",
+        "import os\n",
+        "user_message = \"Hello, whats the weather in San Francisco??\"\n",
+        "messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
+        "\n",
+        "os.environ['HF_TOKEN'] = \"\"#@param\n",
+        "# get your hugging face token from here:\n",
+        "# https://huggingface.co/settings/tokens\n",
+        "\n",
+        "# Optional if you want to run OpenAI TOO\n",
+        "os.environ['OPENAI_API_KEY'] = \"\" #@param\n",
+        "\n",
+        "response = completion(\"stabilityai/stablecode-completion-alpha-3b-4k\", messages=messages, hugging_face=True)\n",
+        "print(\"Response from stabilityai/stablecode-completion-alpha-3b-4k\")\n",
+        "print(response['choices'][0]['message']['content'])\n",
+        "print(\"\\n\\n\")\n",
+        "\n",
+        "response = completion(\"bigcode/starcoder\", messages=messages, hugging_face=True)\n",
+        "print(\"Response from bigcode/starcoder\")\n",
+        "print(response['choices'][0]['message']['content'])\n",
+        "print(\"\\n\\n\")\n",
+        "\n",
+        "response = completion(\"google/flan-t5-xxl\", messages=messages, hugging_face=True)\n",
+        "print(\"Response from google/flan-t5-xxl\")\n",
+        "print(response['choices'][0]['message']['content'])\n",
+        "print(\"\\n\\n\")\n",
+        "\n",
+        "response = completion(\"google/flan-t5-large\", messages=messages, hugging_face=True)\n",
+        "print(\"Response from google/flan-t5-large\")\n",
+        "print(response['choices'][0]['message']['content'])\n",
+        "print(\"\\n\\n\")\n",
+        "\n",
+        "response = completion(model=\"gpt-3.5-turbo\", messages=messages)\n",
+        "print(response['choices'][0]['message']['content'])\n",
+        "print(response)\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "vC54VW3jvLnN",
+        "outputId": "e6616221-12c9-4313-dd03-fd94fa095e8e"
+      },
+      "execution_count": 5,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Response from stabilityai/stablecode-completion-alpha-3b-4k\n",
+            "Hello, whats the weather in San Francisco??\",\n",
+            "    \"id\": 1,\n",
+            "    \"\n",
+            "\n",
+            "\n",
+            "\n",
+            "Response from bigcode/starcoder\n",
+            "Hello, whats the weather in San Francisco??\")\n",
+            "\n",
+            "# print(response)\n",
+            "\n",
+            "# print(response.text)\n",
+            "\n",
+            "#\n",
+            "\n",
+            "\n",
+            "\n",
+            "Response from google/flan-t5-xxl\n",
+            "a little cold\n",
+            "\n",
+            "\n",
+            "\n",
+            "Response from google/flan-t5-large\n",
+            "cool\n",
+            "\n",
+            "\n",
+            "\n",
+            "I'm sorry, but I am an AI language model and do not have real-time data. However, you can check the weather in San Francisco by searching for \"San Francisco weather\" on a search engine or checking a reliable weather website or app.\n"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file

From 225efba211898fd404ea944ae207e944344ac689 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 14:29:07 -0700
Subject: [PATCH 13/34] update docs

---
 docs/supported.md | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/docs/supported.md b/docs/supported.md
index 0eb2ec3d15..7d3cff87e3 100644
--- a/docs/supported.md
+++ b/docs/supported.md
@@ -37,15 +37,22 @@
 | claude-v2  | `completion('claude-v2', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
 
 ### Hugging Face Inference API
-All `text2text-generation`, `text-generation` are supported by liteLLM
-In order to use models on hugging face inference:
-* copy the `model repo` from hugging face and set it as the `model` parameter in the completion call.
-* set `hugging_face` to `True`
 
-| Model Name       | Function Call                              | Required OS Variables                |
-|------------------|--------------------------------------------|--------------------------------------|
-| <a href="https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k" target="_blank">stabilityai/stablecode-completion-alpha-3b-4k</a>  | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True) | `os.environ['HF_TOKEN']`       |
-| <a href="https://huggingface.co/google/flan-t5-xxl" target="_blank">google/flan-t5-xxl</a>  | `completion(model="google/flan-t5-xxl", messages=messages, hugging_face=True) | `os.environ['HF_TOKEN']`       |
+All [`text2text-generation`](https://huggingface.co/models?library=transformers&pipeline_tag=text2text-generation&sort=downloads) and [`text-generation`](https://huggingface.co/models?library=transformers&pipeline_tag=text-generation&sort=downloads) models are supported by liteLLM. You can use any text model from Hugging Face with the following steps:
+1. Copy the `model repo` URL from Hugging Face and set it as the `model` parameter in the completion call.
+2. Set `hugging_face` parameter to `True`.
+3. Make sure to set the hugging face API key
+
+Here are some examples of supported models:
+**Note that the models mentioned in the table are examples, and you can use any text model available on Hugging Face by following the steps above.**
+
+| Model Name       | Function Call                                                                       | Required OS Variables                |
+|------------------|-------------------------------------------------------------------------------------|--------------------------------------|
+| [stabilityai/stablecode-completion-alpha-3b-4k](https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k)  | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True)` | `os.environ['HF_TOKEN']`       |
+| [bigcode/starcoder](https://huggingface.co/bigcode/starcoder)                           | `completion(model="bigcode/starcoder", messages=messages, hugging_face=True)`          | `os.environ['HF_TOKEN']`       |
+| [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl)                         | `completion(model="google/flan-t5-xxl", messages=messages, hugging_face=True)`         | `os.environ['HF_TOKEN']`       |
+| [google/flan-t5-large](https://huggingface.co/google/flan-t5-large)                     | `completion(model="google/flan-t5-large", messages=messages, hugging_face=True)`       | `os.environ['HF_TOKEN']`       |
+
 
 
 

From 6c72b0a12a624f807717fd614228445feef860ee Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 14:44:37 -0700
Subject: [PATCH 14/34] udpate docs

---
 docs/supported.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/supported.md b/docs/supported.md
index 7d3cff87e3..78fbd09507 100644
--- a/docs/supported.md
+++ b/docs/supported.md
@@ -39,9 +39,10 @@
 ### Hugging Face Inference API
 
 All [`text2text-generation`](https://huggingface.co/models?library=transformers&pipeline_tag=text2text-generation&sort=downloads) and [`text-generation`](https://huggingface.co/models?library=transformers&pipeline_tag=text-generation&sort=downloads) models are supported by liteLLM. You can use any text model from Hugging Face with the following steps:
-1. Copy the `model repo` URL from Hugging Face and set it as the `model` parameter in the completion call.
-2. Set `hugging_face` parameter to `True`.
-3. Make sure to set the hugging face API key
+
+* Copy the `model repo` URL from Hugging Face and set it as the `model` parameter in the completion call.
+* Set `hugging_face` parameter to `True`.
+* Make sure to set the hugging face API key
 
 Here are some examples of supported models:
 **Note that the models mentioned in the table are examples, and you can use any text model available on Hugging Face by following the steps above.**

From 1292038148cf9c20bda96895b0c57efac5a77d1f Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 14:49:42 -0700
Subject: [PATCH 15/34] Update README.md

---
 README.md | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index bcb0608f6d..8df5f3dde3 100644
--- a/README.md
+++ b/README.md
@@ -25,11 +25,6 @@ pip install litellm
 ```python
 from litellm import completion
 
-## set ENV variables
-# ENV variables can be set in .env file, too. Example in .env.example
-os.environ["OPENAI_API_KEY"] = "openai key"
-os.environ["COHERE_API_KEY"] = "cohere key"
-
 messages = [{ "content": "Hello, how are you?","role": "user"}]
 
 # openai call
@@ -41,6 +36,9 @@ response = completion("command-nightly", messages)
 # azure openai call
 response = completion("chatgpt-test", messages, azure=True)
 
+# hugging face call
+response = completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True)
+
 # openrouter call
 response = completion("google/palm-2-codechat-bison", messages)
 ```

From e4f96075c3254ca5657f91c1ecf9ab08d7f806f1 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 15:18:48 -0700
Subject: [PATCH 16/34] fix docs claude2

---
 docs/supported.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/supported.md b/docs/supported.md
index 78fbd09507..713e7313fe 100644
--- a/docs/supported.md
+++ b/docs/supported.md
@@ -34,7 +34,7 @@
 | Model Name       | Function Call                              | Required OS Variables                |
 |------------------|--------------------------------------------|--------------------------------------|
 | claude-instant-1  | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
-| claude-v2  | `completion('claude-v2', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
+| claude-2  | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
 
 ### Hugging Face Inference API
 

From 36a6ac9b08fab73f475f198cfa8698ac3f4ceb6a Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 15:57:24 -0700
Subject: [PATCH 17/34] streaming for anthropic

---
 litellm/main.py                  | 24 +++++++++++-
 litellm/tests/test_completion.py | 13 +++++++
 litellm/utils.py                 | 67 +++++++++++++++++++-------------
 3 files changed, 75 insertions(+), 29 deletions(-)

diff --git a/litellm/main.py b/litellm/main.py
index 8f7873099c..8d8c78e256 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -11,6 +11,19 @@ from litellm.utils import get_secret, install_and_import
 ####### ENVIRONMENT VARIABLES ###################
 dotenv.load_dotenv() # Loading env variables using dotenv
 
+# TODO this will evolve to accepting models
+# replicate/anthropic/cohere
+class CustomStreamWrapper:
+    def __init__(self, completion_stream):
+        self.completion_stream = completion_stream
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        chunk = next(self.completion_stream)
+        return {"choices": [{"delta": chunk.completion}]}
+
 new_response = {
         "choices": [
           {
@@ -54,7 +67,8 @@ def completion(
     optional_params = get_optional_params(
       functions=functions, function_call=function_call, 
       temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens,
-      presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user, deployment_id=deployment_id
+      presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user, deployment_id=deployment_id,
+      model=model
     )
     if azure == True:
       # azure configs
@@ -222,8 +236,14 @@ def completion(
       completion = anthropic.completions.create(
             model=model,
             prompt=prompt,
-            max_tokens_to_sample=max_tokens_to_sample
+            max_tokens_to_sample=max_tokens_to_sample,
+            **optional_params
         )
+      if optional_params['stream'] == True:
+        # don't try to access stream object,
+        response = CustomStreamWrapper(completion)
+        return response
+
       completion_response = completion.completion
       ## LOGGING
       logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index e001daa615..35f7b631d7 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -26,6 +26,19 @@ def test_completion_claude():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
+def test_completion_claude_stream():
+    try:
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "how does a court case get to the Supreme Court?"}
+        ]
+        response = completion(model="claude-2", messages=messages, stream=True)
+        # Add any assertions here to check the response
+        for chunk in response:
+            print(chunk['choices'][0]['delta']) # same as openai format
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
 def test_completion_hf_api():
     try:
         user_message = "write some code to find the sum of two numbers"
diff --git a/litellm/utils.py b/litellm/utils.py
index 5b48201314..599c61e246 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -146,36 +146,49 @@ def get_optional_params(
     frequency_penalty = 0,
     logit_bias = {},
     user = "",
-    deployment_id = None
+    deployment_id = None,
+    model = None,
 ):
   optional_params = {}
-  if functions != []:
-      optional_params["functions"] = functions
-  if function_call != "":
-      optional_params["function_call"] = function_call
-  if temperature != 1:
-      optional_params["temperature"] = temperature
-  if top_p != 1:
-      optional_params["top_p"] = top_p
-  if n != 1:
-      optional_params["n"] = n
-  if stream:
+  if model in litellm.anthropic_models:
+    # handle anthropic params
+    if stream:
       optional_params["stream"] = stream
-  if stop != None:
-      optional_params["stop"] = stop
-  if max_tokens != float('inf'):
-      optional_params["max_tokens"] = max_tokens
-  if presence_penalty != 0:
-      optional_params["presence_penalty"] = presence_penalty
-  if frequency_penalty != 0:
-      optional_params["frequency_penalty"] = frequency_penalty
-  if logit_bias != {}:
-      optional_params["logit_bias"] = logit_bias
-  if user != "":
-      optional_params["user"] = user
-  if deployment_id != None:
-      optional_params["deployment_id"] = deployment_id
-  return optional_params
+    if stop != None:
+        optional_params["stop_sequences"] = stop
+    if temperature != 1:
+        optional_params["temperature"] = temperature
+    if top_p != 1:
+        optional_params["top_p"] = top_p
+    return optional_params
+  else:# assume passing in params for openai/azure openai
+    if functions != []:
+        optional_params["functions"] = functions
+    if function_call != "":
+        optional_params["function_call"] = function_call
+    if temperature != 1:
+        optional_params["temperature"] = temperature
+    if top_p != 1:
+        optional_params["top_p"] = top_p
+    if n != 1:
+        optional_params["n"] = n
+    if stream:
+        optional_params["stream"] = stream
+    if stop != None:
+        optional_params["stop"] = stop
+    if max_tokens != float('inf'):
+        optional_params["max_tokens"] = max_tokens
+    if presence_penalty != 0:
+        optional_params["presence_penalty"] = presence_penalty
+    if frequency_penalty != 0:
+        optional_params["frequency_penalty"] = frequency_penalty
+    if logit_bias != {}:
+        optional_params["logit_bias"] = logit_bias
+    if user != "":
+        optional_params["user"] = user
+    if deployment_id != None:
+        optional_params["deployment_id"] = deployment_id
+    return optional_params
 
 def set_callbacks(callback_list):
   global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient

From 5040d08f79edaf1cfbeac1e92d26ed3cdaa5918b Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 16:07:53 -0700
Subject: [PATCH 18/34] fix anthropic streaming

---
 litellm/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/main.py b/litellm/main.py
index 8d8c78e256..0eac877247 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -239,7 +239,7 @@ def completion(
             max_tokens_to_sample=max_tokens_to_sample,
             **optional_params
         )
-      if optional_params['stream'] == True:
+      if 'stream' in optional_params and optional_params['stream'] == True:
         # don't try to access stream object,
         response = CustomStreamWrapper(completion)
         return response

From 276dae803e3ebcc176b9869c73b7c7cf22978ed5 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 16:10:52 -0700
Subject: [PATCH 19/34] anthropic streaming

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 20bc61f467..8515d1e8c8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.362"
+version = "0.1.363"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"

From d6f8cfa3d8895bf1ab60dd6577ac82f2c51e1e79 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 16:19:34 -0700
Subject: [PATCH 20/34] Update README.md

---
 README.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/README.md b/README.md
index 8df5f3dde3..5c671dd056 100644
--- a/README.md
+++ b/README.md
@@ -51,10 +51,15 @@ pip install litellm==0.1.345
 
 ## Streaming Queries
 liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
+Streaming is supported for OpenAI, Azure, Anthropic models
 ```python
 response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
 for chunk in response:
     print(chunk['choices'][0]['delta'])
+# claude 2
+result = litellm.completion('claude-2', messages, stream=True)
+for chunk in result:
+  print(chunk['choices'][0]['delta'])
 ```
 
 # hosted version

From 654e8480d38156d93a8dd03f5976d2a10c7d33ac Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 16:20:49 -0700
Subject: [PATCH 21/34] example with Claude+streaming

---
 ...pic)_with_Streaming_liteLLM_Examples.ipynb | 406 ++++++++++++++++++
 1 file changed, 406 insertions(+)
 create mode 100644 cookbook/Claude_(Anthropic)_with_Streaming_liteLLM_Examples.ipynb

diff --git a/cookbook/Claude_(Anthropic)_with_Streaming_liteLLM_Examples.ipynb b/cookbook/Claude_(Anthropic)_with_Streaming_liteLLM_Examples.ipynb
new file mode 100644
index 0000000000..f3875ae608
--- /dev/null
+++ b/cookbook/Claude_(Anthropic)_with_Streaming_liteLLM_Examples.ipynb
@@ -0,0 +1,406 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ZwuaylskLxFu",
+        "outputId": "d684d6a3-32fe-4beb-c378-c39134bcf8cc"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Collecting litellm==0.1.363\n",
+            "  Downloading litellm-0.1.363-py3-none-any.whl (34 kB)\n",
+            "Requirement already satisfied: openai<0.28.0,>=0.27.8 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (0.27.8)\n",
+            "Requirement already satisfied: python-dotenv<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (1.0.0)\n",
+            "Requirement already satisfied: tiktoken<0.5.0,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (0.4.0)\n",
+            "Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (2.31.0)\n",
+            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (4.65.0)\n",
+            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.8.5)\n",
+            "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken<0.5.0,>=0.4.0->litellm==0.1.363) (2022.10.31)\n",
+            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.2.0)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.4)\n",
+            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.26.16)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (2023.7.22)\n",
+            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (23.1.0)\n",
+            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (6.0.4)\n",
+            "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (4.0.2)\n",
+            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.9.2)\n",
+            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.4.0)\n",
+            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.3.1)\n",
+            "Installing collected packages: litellm\n",
+            "  Attempting uninstall: litellm\n",
+            "    Found existing installation: litellm 0.1.362\n",
+            "    Uninstalling litellm-0.1.362:\n",
+            "      Successfully uninstalled litellm-0.1.362\n",
+            "Successfully installed litellm-0.1.363\n"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install litellm==\"0.1.363\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title Import litellm & Set env variables\n",
+        "import litellm\n",
+        "import os\n",
+        "\n",
+        "os.environ[\"ANTHROPIC_API_KEY\"] = \" \" #@param"
+      ],
+      "metadata": {
+        "id": "W216G__XL19Q"
+      },
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title Request Claude Instant-1 and Claude-2\n",
+        "messages = [\n",
+        "  {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
+        "  {\"role\": \"user\", \"content\": \"Who won the world series in 2020?\"}\n",
+        "  ]\n",
+        "\n",
+        "result = litellm.completion('claude-instant-1', messages)\n",
+        "print(\"\\n\\n Result from claude-instant-1\", result)\n",
+        "result = litellm.completion('claude-2', messages, max_tokens=5, temperature=0.2)\n",
+        "print(\"\\n\\n Result from claude-2\", result)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ff1lKwUMMLJj",
+        "outputId": "bfddf6f8-36d4-45e5-92dc-349083fa41b8"
+      },
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\n",
+            "\n",
+            " Result from claude-instant-1 {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': \" The Los Angeles Dodgers won the 2020 World Series, defeating the Tampa Bay Rays 4-2. It was the Dodgers' first World Series title since 1988.\"}}], 'created': 1691536677.2676156, 'model': 'claude-instant-1', 'usage': {'prompt_tokens': 30, 'completion_tokens': 32, 'total_tokens': 62}}\n",
+            "\n",
+            "\n",
+            " Result from claude-2 {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': ' The Los Angeles Dodgers won'}}], 'created': 1691536677.944753, 'model': 'claude-2', 'usage': {'prompt_tokens': 30, 'completion_tokens': 5, 'total_tokens': 35}}\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title Streaming Example: Request Claude-2\n",
+        "messages = [\n",
+        "  {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
+        "  {\"role\": \"user\", \"content\": \"how does a court case get to the Supreme Court?\"}\n",
+        "  ]\n",
+        "\n",
+        "result = litellm.completion('claude-2', messages, stream=True)\n",
+        "for chunk in result:\n",
+        "  print(chunk['choices'][0]['delta'])\n",
+        "\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "06hWKnNQMrV-",
+        "outputId": "7fdec0eb-d4a9-4882-f9c4-987ff9a31114"
+      },
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            " Here\n",
+            "'s\n",
+            " a\n",
+            " quick\n",
+            " overview\n",
+            " of\n",
+            " how\n",
+            " a\n",
+            " court\n",
+            " case\n",
+            " can\n",
+            " reach\n",
+            " the\n",
+            " U\n",
+            ".\n",
+            "S\n",
+            ".\n",
+            " Supreme\n",
+            " Court\n",
+            ":\n",
+            "\n",
+            "\n",
+            "-\n",
+            " The\n",
+            " case\n",
+            " must\n",
+            " first\n",
+            " be\n",
+            " heard\n",
+            " in\n",
+            " a\n",
+            " lower\n",
+            " trial\n",
+            " court\n",
+            " (\n",
+            "either\n",
+            " a\n",
+            " state\n",
+            " court\n",
+            " or\n",
+            " federal\n",
+            " district\n",
+            " court\n",
+            ").\n",
+            " The\n",
+            " trial\n",
+            " court\n",
+            " makes\n",
+            " initial\n",
+            " r\n",
+            "ulings\n",
+            " and\n",
+            " produces\n",
+            " a\n",
+            " record\n",
+            " of\n",
+            " the\n",
+            " case\n",
+            ".\n",
+            "\n",
+            "\n",
+            "-\n",
+            " The\n",
+            " losing\n",
+            " party\n",
+            " can\n",
+            " appeal\n",
+            " the\n",
+            " decision\n",
+            " to\n",
+            " an\n",
+            " appeals\n",
+            " court\n",
+            " (\n",
+            "a\n",
+            " state\n",
+            " appeals\n",
+            " court\n",
+            " for\n",
+            " state\n",
+            " cases\n",
+            ",\n",
+            " or\n",
+            " a\n",
+            " federal\n",
+            " circuit\n",
+            " court\n",
+            " for\n",
+            " federal\n",
+            " cases\n",
+            ").\n",
+            " The\n",
+            " appeals\n",
+            " court\n",
+            " reviews\n",
+            " the\n",
+            " trial\n",
+            " court\n",
+            "'s\n",
+            " r\n",
+            "ulings\n",
+            " and\n",
+            " can\n",
+            " affirm\n",
+            ",\n",
+            " reverse\n",
+            ",\n",
+            " or\n",
+            " modify\n",
+            " the\n",
+            " decision\n",
+            ".\n",
+            "\n",
+            "\n",
+            "-\n",
+            " If\n",
+            " a\n",
+            " party\n",
+            " is\n",
+            " still\n",
+            " unsat\n",
+            "isf\n",
+            "ied\n",
+            " after\n",
+            " the\n",
+            " appeals\n",
+            " court\n",
+            " rules\n",
+            ",\n",
+            " they\n",
+            " can\n",
+            " petition\n",
+            " the\n",
+            " Supreme\n",
+            " Court\n",
+            " to\n",
+            " hear\n",
+            " the\n",
+            " case\n",
+            " through\n",
+            " a\n",
+            " writ\n",
+            " of\n",
+            " cert\n",
+            "ior\n",
+            "ari\n",
+            ".\n",
+            " \n",
+            "\n",
+            "\n",
+            "-\n",
+            " The\n",
+            " Supreme\n",
+            " Court\n",
+            " gets\n",
+            " thousands\n",
+            " of\n",
+            " cert\n",
+            " petitions\n",
+            " every\n",
+            " year\n",
+            " but\n",
+            " usually\n",
+            " only\n",
+            " agrees\n",
+            " to\n",
+            " hear\n",
+            " about\n",
+            " 100\n",
+            "-\n",
+            "150\n",
+            " of\n",
+            " cases\n",
+            " that\n",
+            " have\n",
+            " significant\n",
+            " national\n",
+            " importance\n",
+            " or\n",
+            " where\n",
+            " lower\n",
+            " courts\n",
+            " disagree\n",
+            " on\n",
+            " federal\n",
+            " law\n",
+            ".\n",
+            " \n",
+            "\n",
+            "\n",
+            "-\n",
+            " If\n",
+            " 4\n",
+            " out\n",
+            " of\n",
+            " the\n",
+            " 9\n",
+            " Just\n",
+            "ices\n",
+            " vote\n",
+            " to\n",
+            " grant\n",
+            " cert\n",
+            " (\n",
+            "agree\n",
+            " to\n",
+            " hear\n",
+            " the\n",
+            " case\n",
+            "),\n",
+            " it\n",
+            " goes\n",
+            " on\n",
+            " the\n",
+            " Supreme\n",
+            " Court\n",
+            "'s\n",
+            " do\n",
+            "cket\n",
+            " for\n",
+            " arguments\n",
+            ".\n",
+            "\n",
+            "\n",
+            "-\n",
+            " The\n",
+            " Supreme\n",
+            " Court\n",
+            " then\n",
+            " hears\n",
+            " oral\n",
+            " arguments\n",
+            ",\n",
+            " considers\n",
+            " written\n",
+            " brief\n",
+            "s\n",
+            ",\n",
+            " examines\n",
+            " the\n",
+            " lower\n",
+            " court\n",
+            " records\n",
+            ",\n",
+            " and\n",
+            " issues\n",
+            " a\n",
+            " final\n",
+            " ruling\n",
+            " on\n",
+            " the\n",
+            " case\n",
+            ",\n",
+            " which\n",
+            " serves\n",
+            " as\n",
+            " binding\n",
+            " precedent\n"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file

From 72911a6bd01b0a978b70bedd51e6f60883a4babe Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 16:22:18 -0700
Subject: [PATCH 22/34] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 5c671dd056..36fc85b654 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,7 @@ Streaming is supported for OpenAI, Azure, Anthropic models
 response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
 for chunk in response:
     print(chunk['choices'][0]['delta'])
+
 # claude 2
 result = litellm.completion('claude-2', messages, stream=True)
 for chunk in result:

From ac460dd616c7c9e10ffb248eaf92be00dc2246de Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 16:23:59 -0700
Subject: [PATCH 23/34] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 36fc85b654..b8884dd1e4 100644
--- a/README.md
+++ b/README.md
@@ -58,7 +58,7 @@ for chunk in response:
     print(chunk['choices'][0]['delta'])
 
 # claude 2
-result = litellm.completion('claude-2', messages, stream=True)
+result = completion('claude-2', messages, stream=True)
 for chunk in result:
   print(chunk['choices'][0]['delta'])
 ```

From 613df8942f42d43503bcf45f3e3b73804c018b8a Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 17:01:58 -0700
Subject: [PATCH 24/34] add cohere streaming

---
 litellm/main.py                  | 27 +++++++++++++++++++++------
 litellm/tests/test_completion.py | 14 ++++++++++++++
 litellm/utils.py                 | 10 ++++++++++
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/litellm/main.py b/litellm/main.py
index 0eac877247..17144a47f0 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -14,15 +14,24 @@ dotenv.load_dotenv() # Loading env variables using dotenv
 # TODO this will evolve to accepting models
 # replicate/anthropic/cohere
 class CustomStreamWrapper:
-    def __init__(self, completion_stream):
-        self.completion_stream = completion_stream
+    def __init__(self, completion_stream, model):
+        self.model = model
+        if model in litellm.cohere_models:
+           # cohere does not return an iterator, so we need to wrap it in one
+           self.completion_stream = iter(completion_stream)
+        else: 
+          self.completion_stream = completion_stream
 
     def __iter__(self):
         return self
 
     def __next__(self):
-        chunk = next(self.completion_stream)
-        return {"choices": [{"delta": chunk.completion}]}
+        if self.model in litellm.anthropic_models:
+          chunk = next(self.completion_stream)
+          return {"choices": [{"delta": chunk.completion}]}
+        elif self.model in litellm.cohere_models:
+          chunk = next(self.completion_stream)
+          return {"choices": [{"delta": chunk.text}]}
 
 new_response = {
         "choices": [
@@ -241,7 +250,7 @@ def completion(
         )
       if 'stream' in optional_params and optional_params['stream'] == True:
         # don't try to access stream object,
-        response = CustomStreamWrapper(completion)
+        response = CustomStreamWrapper(completion, model)
         return response
 
       completion_response = completion.completion
@@ -277,8 +286,14 @@ def completion(
       ## COMPLETION CALL
       response = co.generate(  
         model=model,
-        prompt = prompt
+        prompt = prompt,
+        **optional_params
       )
+      if 'stream' in optional_params and optional_params['stream'] == True:
+        # don't try to access stream object,
+        response = CustomStreamWrapper(response, model)
+        return response
+
       completion_response = response[0].text
       ## LOGGING
       logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 35f7b631d7..d5733e2fb4 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -57,6 +57,20 @@ def test_completion_cohere():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
+
+def test_completion_cohere_stream():
+    try:
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "how does a court case get to the Supreme Court?"}
+        ]
+        response = completion(model="command-nightly", messages=messages, stream=True, max_tokens=50)
+        # Add any assertions here to check the response
+        for chunk in response:
+            print(chunk['choices'][0]['delta']) # same as openai format
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
 def test_completion_openai():
     try:
         response = completion(model="gpt-3.5-turbo", messages=messages)
diff --git a/litellm/utils.py b/litellm/utils.py
index 599c61e246..04e92737a5 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -161,6 +161,16 @@ def get_optional_params(
     if top_p != 1:
         optional_params["top_p"] = top_p
     return optional_params
+  elif model in litellm.cohere_models:
+     # handle cohere params
+    if stream:
+      optional_params["stream"] = stream
+    if temperature != 1:
+        optional_params["temperature"] = temperature
+    if max_tokens != float('inf'):
+        optional_params["max_tokens"] = max_tokens
+    return optional_params
+
   else:# assume passing in params for openai/azure openai
     if functions != []:
         optional_params["functions"] = functions

From d87ae075747cdf54aaf97823aa295e9ec7d8a465 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 17:02:34 -0700
Subject: [PATCH 25/34] with cohere streaming

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8515d1e8c8..c8de403b30 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.363"
+version = "0.1.364"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"

From f4048886abe867583e1c65c501a610b45fdc6acb Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 17:50:36 -0700
Subject: [PATCH 26/34] streaming replicate tests

---
 litellm/main.py                  | 37 +++++++-----------------
 litellm/tests/test_completion.py | 48 +++++++++++++++++++++-----------
 litellm/utils.py                 | 37 +++++++++++++++++++++++-
 3 files changed, 78 insertions(+), 44 deletions(-)

diff --git a/litellm/main.py b/litellm/main.py
index 17144a47f0..b4a70709bf 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -7,32 +7,9 @@ import litellm
 from litellm import client, logging, exception_type, timeout, get_optional_params
 import tiktoken
 encoding = tiktoken.get_encoding("cl100k_base")
-from litellm.utils import get_secret, install_and_import
+from litellm.utils import get_secret, install_and_import, CustomStreamWrapper
 ####### ENVIRONMENT VARIABLES ###################
 dotenv.load_dotenv() # Loading env variables using dotenv
-
-# TODO this will evolve to accepting models
-# replicate/anthropic/cohere
-class CustomStreamWrapper:
-    def __init__(self, completion_stream, model):
-        self.model = model
-        if model in litellm.cohere_models:
-           # cohere does not return an iterator, so we need to wrap it in one
-           self.completion_stream = iter(completion_stream)
-        else: 
-          self.completion_stream = completion_stream
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        if self.model in litellm.anthropic_models:
-          chunk = next(self.completion_stream)
-          return {"choices": [{"delta": chunk.completion}]}
-        elif self.model in litellm.cohere_models:
-          chunk = next(self.completion_stream)
-          return {"choices": [{"delta": chunk.text}]}
-
 new_response = {
         "choices": [
           {
@@ -67,7 +44,7 @@ def completion(
     presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", deployment_id=None,
     # Optional liteLLM function params
     *, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False,
-    hugging_face = False
+    hugging_face = False, replicate=False,
   ):
   try:
     global new_response
@@ -77,7 +54,8 @@ def completion(
       functions=functions, function_call=function_call, 
       temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens,
       presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user, deployment_id=deployment_id,
-      model=model
+      # params to identify the model
+      model=model, replicate=replicate, hugging_face=hugging_face
     )
     if azure == True:
       # azure configs
@@ -172,7 +150,7 @@ def completion(
       model_response["model"] = model
       model_response["usage"] = response["usage"]
       response = model_response
-    elif "replicate" in model:
+    elif "replicate" in model or replicate == True:
       # import replicate/if it fails then pip install replicate
       install_and_import("replicate")
       import replicate
@@ -196,6 +174,11 @@ def completion(
       output = replicate.run(
         model,
         input=input)
+      if 'stream' in optional_params and optional_params['stream'] == True:
+        # don't try to access stream object,
+        # let the stream handler know this is replicate
+        response = CustomStreamWrapper(output, "replicate")
+        return response
       response = ""
       for item in output: 
         response += item
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index d5733e2fb4..304eb0303e 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -139,20 +139,36 @@ def test_completion_azure():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
-
-
 # Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect. 
-# [TODO] improve our try-except block to handle for these
-# def test_completion_replicate_llama():
-#     model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
-#     try:
-#         response = completion(model=model_name, messages=messages, max_tokens=500)
-#         # Add any assertions here to check the response
-#         print(response)
-#     except Exception as e:
-#         print(f"in replicate llama, got error {e}")
-#         pass
-#         if e == "FunctionTimedOut":
-#             pass
-#         else:
-#             pytest.fail(f"Error occurred: {e}")
+def test_completion_replicate_llama_stream():
+    model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
+    try:
+        response = completion(model=model_name, messages=messages, stream=True)
+        # Add any assertions here to check the response
+        for result in response:
+            print(result)
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+def test_completion_replicate_stability_stream():
+    model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
+    try:
+        response = completion(model=model_name, messages=messages, stream=True, replicate=True)
+        # Add any assertions here to check the response
+        for result in response:
+            print(result)
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+def test_completion_replicate_stability():
+    model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
+    try:
+        response = completion(model=model_name, messages=messages, replicate=True)
+        # Add any assertions here to check the response
+        for result in response:
+            print(result)
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index 04e92737a5..c92440dce9 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -148,6 +148,8 @@ def get_optional_params(
     user = "",
     deployment_id = None,
     model = None,
+    replicate = False,
+    hugging_face = False,
 ):
   optional_params = {}
   if model in litellm.anthropic_models:
@@ -170,7 +172,12 @@ def get_optional_params(
     if max_tokens != float('inf'):
         optional_params["max_tokens"] = max_tokens
     return optional_params
-
+  elif replicate == True:
+    # any replicate models
+    # TODO: handle translating remaining replicate params
+    if stream:
+      optional_params["stream"] = stream
+      return optional_params
   else:# assume passing in params for openai/azure openai
     if functions != []:
         optional_params["functions"] = functions
@@ -199,6 +206,7 @@ def get_optional_params(
     if deployment_id != None:
         optional_params["deployment_id"] = deployment_id
     return optional_params
+  return optional_params
 
 def set_callbacks(callback_list):
   global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient
@@ -557,3 +565,30 @@ def get_secret(secret_name):
       return os.environ.get(secret_name)
   else:
     return os.environ.get(secret_name)
+
+######## Streaming Class ############################
+# wraps the completion stream to return the correct format for the model
+# replicate/anthropic/cohere
+class CustomStreamWrapper:
+    def __init__(self, completion_stream, model):
+        self.model = model
+        if model in litellm.cohere_models:
+           # cohere does not return an iterator, so we need to wrap it in one
+           self.completion_stream = iter(completion_stream)
+        else: 
+          self.completion_stream = completion_stream
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.model in litellm.anthropic_models:
+          chunk = next(self.completion_stream)
+          return {"choices": [{"delta": chunk.completion}]}
+        elif self.model == "replicate":
+           chunk = next(self.completion_stream)
+           return {"choices": [{"delta": chunk}]}
+        elif self.model in litellm.cohere_models:
+          chunk = next(self.completion_stream)
+          return {"choices": [{"delta": chunk.text}]}
+

From e28576c835dcb711d9e2008482149174fe6496f7 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 8 Aug 2023 17:54:29 -0700
Subject: [PATCH 27/34] bump package version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index c8de403b30..87d67d4fa7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.364"
+version = "0.1.365"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"

From 8e8ba0315b2a47bc8e5ee41b3163f7bbbb2e5825 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 8 Aug 2023 20:47:02 -0700
Subject: [PATCH 28/34] add helper functions for token usage calculation

---
 litellm/__init__.py | 19 +++++++++++++++-
 litellm/tests.txt   |  1 -
 litellm/utils.py    | 55 ++++++++++++++++++++++++++++++++-------------
 pyproject.toml      |  2 +-
 4 files changed, 59 insertions(+), 18 deletions(-)
 delete mode 100644 litellm/tests.txt

diff --git a/litellm/__init__.py b/litellm/__init__.py
index 01559e3d18..4c18d0e63c 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -10,8 +10,25 @@ azure_key = None
 anthropic_key = None 
 replicate_key = None 
 cohere_key = None 
-
 hugging_api_token = None
+
+model_cost = {
+    "gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
+    "gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
+    "gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
+    "gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
+    "gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
+    "gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
+    "gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
+    "gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
+    "gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
+    "gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
+    "claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
+    "claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
+    "text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
+    "chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
+    "command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
+}
 ####### THREAD-SPECIFIC DATA ###################
 class MyLocal(threading.local):
     def __init__(self):
diff --git a/litellm/tests.txt b/litellm/tests.txt
deleted file mode 100644
index 4f67a836c5..0000000000
--- a/litellm/tests.txt
+++ /dev/null
@@ -1 +0,0 @@
-test 1
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index c92440dce9..b47e082712 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -131,6 +131,46 @@ def client(original_function):
           raise e
     return wrapper
 
+####### USAGE CALCULATOR ################
+
+def prompt_token_calculator(model, messages):
+  # use tiktoken or anthropic's tokenizer depending on the model
+  text = " ".join(message["content"] for message in messages)
+  num_tokens = 0
+  if "claude" in model:
+    install_and_import('anthropic')
+    from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
+    anthropic = Anthropic()
+    num_tokens = anthropic.count_tokens(text)
+  else:
+    num_tokens = len(encoding.encode(text))
+  return num_tokens
+
+
+def cost_per_token(model="gpt-3.5-turbo", prompt_tokens = 0, completion_tokens = 0):
+   ## given 
+  prompt_tokens_cost_usd_dollar = 0
+  completion_tokens_cost_usd_dollar = 0
+  model_cost_ref = litellm.model_cost
+  if model in model_cost_ref:
+    prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+    completion_tokens_cost_usd_dollar = model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+    return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+  else:
+    # calculate average input cost 
+    input_cost_sum = 0
+    output_cost_sum = 0
+    model_cost_ref = litellm.model_cost
+    for model in model_cost_ref:
+        input_cost_sum += model_cost_ref[model]["input_cost_per_token"]
+        output_cost_sum += model_cost_ref[model]["output_cost_per_token"]
+    avg_input_cost = input_cost_sum / len(model_cost_ref.keys())
+    avg_output_cost = output_cost_sum / len(model_cost_ref.keys())
+    prompt_tokens_cost_usd_dollar = avg_input_cost * prompt_tokens
+    completion_tokens_cost_usd_dollar = avg_output_cost * completion_tokens
+  return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    
+
 ####### HELPER FUNCTIONS ################
 def get_optional_params(
     # 12 optional params
@@ -367,21 +407,6 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
       logging(logger_fn=user_logger_fn, exception=e)
       pass
 
-def prompt_token_calculator(model, messages):
-  # use tiktoken or anthropic's tokenizer depending on the model
-  text = " ".join(message["content"] for message in messages)
-  num_tokens = 0
-  if "claude" in model:
-    install_and_import('anthropic')
-    from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
-    anthropic = Anthropic()
-    num_tokens = anthropic.count_tokens(text)
-  else:
-    num_tokens = len(encoding.encode(text))
-  return num_tokens
-  
-      
-
 def handle_success(args, kwargs, result, start_time, end_time):
   global heliconeLogger, aispendLogger
   try:
diff --git a/pyproject.toml b/pyproject.toml
index 87d67d4fa7..dc608b8411 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.365"
+version = "0.1.366"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"

From ee6c45ca6a340930218b168befa8dbe38d8642bc Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 8 Aug 2023 21:11:06 -0700
Subject: [PATCH 29/34] add token usage

---
 docs/token_usage.md | 45 +++++++++++++++++++++++++++++++++++++++++++++
 litellm/__init__.py |  2 +-
 litellm/utils.py    | 24 +++++++++++++++++++++---
 mkdocs.yml          |  2 ++
 pyproject.toml      |  2 +-
 5 files changed, 70 insertions(+), 5 deletions(-)
 create mode 100644 docs/token_usage.md

diff --git a/docs/token_usage.md b/docs/token_usage.md
new file mode 100644
index 0000000000..5bf2fbd3df
--- /dev/null
+++ b/docs/token_usage.md
@@ -0,0 +1,45 @@
+# Token Usage
+By default LiteLLM returns token usage in all completion requests ([See here](https://litellm.readthedocs.io/en/latest/output/))
+
+However, we also expose 3 public helper functions to calculate token usage across providers:
+
+- `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available. 
+
+- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/cookbook/community-resources/max_tokens.json).
+
+- `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output). 
+
+## Example Usage 
+
+1. `token_counter`
+
+```python
+from litellm import token_counter
+
+messages = [{"user": "role", "content": "Hey, how's it going"}]
+print(token_counter(model="gpt-3.5-turbo", messages=messages))
+```
+
+2. `cost_per_token`
+
+```python
+from litellm import cost_per_token
+
+prompt_tokens =  5
+completion_tokens = 10
+prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(model="gpt-3.5-turbo", prompt_tokens=prompt_tokens, completion_tokens=completion_tokens))
+
+print(prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar)
+```
+
+3. `completion_cost`
+
+```python
+from litellm import completion_cost
+
+prompt = "Hey, how's it going"
+completion = "Hi, I'm gpt - I am doing well"
+cost_of_query = completion_cost(model="gpt-3.5-turbo", prompt=prompt, completion=completion))
+
+print(cost_of_query)
+```
diff --git a/litellm/__init__.py b/litellm/__init__.py
index 4c18d0e63c..9b0154dda7 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -89,7 +89,7 @@ open_ai_embedding_models = [
     'text-embedding-ada-002'
 ]
 from .timeout import timeout
-from .utils import client, logging, exception_type, get_optional_params, modify_integration
+from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost
 from .main import *  # Import all the symbols from main.py
 from .integrations import *
 from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index b47e082712..b81e9bc0d5 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -133,9 +133,8 @@ def client(original_function):
 
 ####### USAGE CALCULATOR ################
 
-def prompt_token_calculator(model, messages):
+def token_counter(model, text):
   # use tiktoken or anthropic's tokenizer depending on the model
-  text = " ".join(message["content"] for message in messages)
   num_tokens = 0
   if "claude" in model:
     install_and_import('anthropic')
@@ -168,9 +167,15 @@ def cost_per_token(model="gpt-3.5-turbo", prompt_tokens = 0, completion_tokens =
     avg_output_cost = output_cost_sum / len(model_cost_ref.keys())
     prompt_tokens_cost_usd_dollar = avg_input_cost * prompt_tokens
     completion_tokens_cost_usd_dollar = avg_output_cost * completion_tokens
-  return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     
 
+def completion_cost(model="gpt-3.5-turbo", prompt="", completion=""):
+   prompt_tokens = tokenizer(model=model, text=prompt)
+   completion_tokens = tokenizer(model=model, text=completion)
+   prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(model=model, prompt_tokens = prompt_tokens, completion_tokens = completion_tokens)
+   return prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
+
 ####### HELPER FUNCTIONS ################
 def get_optional_params(
     # 12 optional params
@@ -466,6 +471,19 @@ def handle_success(args, kwargs, result, start_time, end_time):
     print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}")
     pass
 
+def prompt_token_calculator(model, messages):
+  # use tiktoken or anthropic's tokenizer depending on the model
+  text = " ".join(message["content"] for message in messages)
+  num_tokens = 0
+  if "claude" in model:
+    install_and_import('anthropic')
+    from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
+    anthropic = Anthropic()
+    num_tokens = anthropic.count_tokens(text)
+  else:
+    num_tokens = len(encoding.encode(text))
+  return num_tokens
+
 # integration helper function 
 def modify_integration(integration_name, integration_params):
    global supabaseClient
diff --git a/mkdocs.yml b/mkdocs.yml
index e7326d0d67..97ed0d9ed8 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -6,6 +6,8 @@ nav:
       - Input - Request Body: input.md
       - Output - Response Object: output.md
       - Streaming & Async Calls: stream.md
+    - token usage:
+      - Helper Functions: token_usage.md
   - 🤖 Supported LLM APIs: 
     - Supported Completion & Chat APIs: supported.md
     - Supported Embedding APIs: supported_embedding.md
diff --git a/pyproject.toml b/pyproject.toml
index dc608b8411..0600035ca2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.366"
+version = "0.1.367"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"

From 95571d1f48a7a284998a38b58cba27ddc9252782 Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 9 Aug 2023 06:04:32 -0700
Subject: [PATCH 30/34] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b8884dd1e4..1a356a37bd 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ litellm manages:
 - guarantees consistent output, text responses will always be available at `['choices'][0]['message']['content']`
 
 # usage
-
+Demo - https://litellm.ai/ \
 Read the docs - https://litellm.readthedocs.io/en/latest/
 
 ## quick start

From bcb7e390555abdeaeafee52b36dc894ff00eec2f Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 9 Aug 2023 06:09:28 -0700
Subject: [PATCH 31/34] Update README.md

---
 README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 1a356a37bd..ca0e9db714 100644
--- a/README.md
+++ b/README.md
@@ -63,11 +63,11 @@ for chunk in result:
   print(chunk['choices'][0]['delta'])
 ```
 
-# hosted version
-- [Grab time if you want access 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
+# support / talk with founders
+- [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
+- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
+- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
+- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
 
 # why did we build this 
 - **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI, Cohere
-
-# Support
-Contact us at ishaan@berri.ai / krrish@berri.ai

From 2aa53738730c319bd7c7641efdb0485d5bd180fa Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 9 Aug 2023 06:10:00 -0700
Subject: [PATCH 32/34] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ca0e9db714..ac76d795da 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
 
 Get Support / Join the community 👉 [![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw)
 
-a simple & light package to call OpenAI, Azure, Cohere, Anthropic API Endpoints 
+a simple & light package to call OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints 
 
 litellm manages:
 - translating inputs to completion and embedding endpoints

From 2b84abbe27aad13df202380a6bd3cd12207a39e0 Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 9 Aug 2023 06:16:19 -0700
Subject: [PATCH 33/34] Update README.md

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index ac76d795da..14b5ecd2ce 100644
--- a/README.md
+++ b/README.md
@@ -7,12 +7,12 @@
 
 Get Support / Join the community 👉 [![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw)
 
-a simple & light package to call OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints 
+a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints 
 
 litellm manages:
-- translating inputs to completion and embedding endpoints
-- guarantees consistent output, text responses will always be available at `['choices'][0]['message']['content']`
-
+- translating inputs to the provider's completion and embedding endpoints
+- guarantees [consistent output](https://litellm.readthedocs.io/en/latest/output/), text responses will always be available at `['choices'][0]['message']['content']`
+- exception mapping - common exceptions across providers are mapped to the [OpenAI exception types](https://help.openai.com/en/articles/6897213-openai-library-error-types-guidance)
 # usage
 Demo - https://litellm.ai/ \
 Read the docs - https://litellm.readthedocs.io/en/latest/

From 00dc207eeecf3aca060579bf8317cf912a1a8b58 Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 9 Aug 2023 06:17:08 -0700
Subject: [PATCH 34/34] Update README.md

---
 README.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 14b5ecd2ce..488c211b85 100644
--- a/README.md
+++ b/README.md
@@ -5,11 +5,9 @@
 ![Downloads](https://img.shields.io/pypi/dm/litellm)
 [![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere%7CReplicate%7CHugging%20Face-blue?color=green)](https://github.com/BerriAI/litellm)
 
-Get Support / Join the community 👉 [![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw)
+[![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw)
 
-a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints 
-
-litellm manages:
+a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints. It manages:
 - translating inputs to the provider's completion and embedding endpoints
 - guarantees [consistent output](https://litellm.readthedocs.io/en/latest/output/), text responses will always be available at `['choices'][0]['message']['content']`
 - exception mapping - common exceptions across providers are mapped to the [OpenAI exception types](https://help.openai.com/en/articles/6897213-openai-library-error-types-guidance)