Merge branch 'main' of github.com:vincelwt/litellm

2023-08-21 12:22:07 +02:00 · 2023-08-21 12:22:07 +02:00 · 22c7e38de5
commit 22c7e38de5
parent 3e9472ec11 a1ad773667
94 changed files with 5691 additions and 2812 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -8,6 +8,16 @@ jobs:
    steps:
      - checkout

+      - run:
+          name: Check if litellm dir was updated or if pyproject.toml was modified
+          command: |
+            if [ -n "$(git diff --name-only $CIRCLE_SHA1^..$CIRCLE_SHA1 | grep -E 'pyproject\.toml|litellm/')" ]; then
+              echo "litellm updated"
+            else
+              echo "No changes to litellm or pyproject.toml. Skipping tests."
+              circleci step halt
+            fi
+
      - run:
          name: Install Dependencies
          command: |
@ -15,8 +25,20 @@ jobs:
            python -m pip install -r .circleci/requirements.txt
            pip install infisical
            pip install pytest
+            pip install mypy
            pip install openai[datalib]
            pip install -Uq chromadb==0.3.29
+
+      - run:
+          name: Linting Testing
+          command: |
+            cd litellm
+            python -m pip install types-requests types-setuptools
+            if ! python -m mypy . --ignore-missing-imports; then
+              echo "mypy detected errors"
+              exit 1
+            fi
+            cd ..
  

      # Run pytest and generate JUnit XML report
@ -77,7 +99,3 @@ workflows:
      - publish_to_pypi:
          requires:
            - local_testing
-          filters:
-            branches:
-              only:
-                - main
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 # *🚅 litellm*
 [![PyPI Version](https://img.shields.io/pypi/v/litellm.svg)](https://pypi.org/project/litellm/)
-[![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.345-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/)
+[![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.424-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/)
 [![CircleCI](https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main)
 ![Downloads](https://img.shields.io/pypi/dm/litellm)
 [![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere%7CReplicate%7CHugging%20Face-blue?color=green)](https://github.com/BerriAI/litellm)
@ -35,13 +35,13 @@ messages = [{ "content": "Hello, how are you?","role": "user"}]
 response = completion(model="gpt-3.5-turbo", messages=messages)

 # cohere call
-response = completion("command-nightly", messages)
+response = completion(model="command-nightly", messages)
 ```
 Code Sample: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)

 Stable version
 ```
-pip install litellm==0.1.345
+pip install litellm==0.1.424
 ```

 ## Streaming Queries
--- a/cookbook/Evaluating_LLMs.ipynb
+++ b/cookbook/Evaluating_LLMs.ipynb
--- a/cookbook/TogetherAI_liteLLM.ipynb
+++ b/cookbook/TogetherAI_liteLLM.ipynb
@ -19,12 +19,12 @@
      },
      "outputs": [],
      "source": [
-        "!pip install litellm==0.1.371"
+        "!pip install litellm==0.1.419"
      ]
    },
    {
      "cell_type": "code",
-      "execution_count": 5,
+      "execution_count": 3,
      "metadata": {
        "id": "TMI3739_9q97"
      },
@ -32,7 +32,7 @@
      "source": [
        "import os\n",
        "from litellm import completion\n",
-        "os.environ[\"TOGETHER_AI_TOKEN\"] = \"\" #@param\n",
+        "os.environ[\"TOGETHERAI_API_KEY\"] = \"\" #@param\n",
        "user_message = \"Hello, whats the weather in San Francisco??\"\n",
        "messages = [{ \"content\": user_message,\"role\": \"user\"}]"
      ]
@ -50,26 +50,47 @@
    },
    {
      "cell_type": "code",
-      "execution_count": 6,
+      "execution_count": 4,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Jrrt8puj523f",
-        "outputId": "5a5b5beb-cda3-413e-8e83-4423d392cb44"
+        "outputId": "24494dea-816f-47a6-ade4-1b04f2e9085b"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
-            "{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': \"\\n\\nI'm not able to provide real-time weather information. However, I can suggest\"}}], 'created': 1691629657.9288375, 'model': 'togethercomputer/llama-2-70b-chat', 'usage': {'prompt_tokens': 9, 'completion_tokens': 17, 'total_tokens': 26}}\n"
+            "{\n",
+            "  'choices': [\n",
+            "{\n",
+            "  'finish_reason': 'stop',\n",
+            "  'index': 0,\n",
+            "  'message': {\n",
+            "  'role': 'assistant',\n",
+            "  'content': \"\n",
+            "\n",
+            "I'm not able to provide real-time weather information. However, I can suggest some ways for you to find out the current weather in San Francisco.\n",
+            "\n",
+            "1. Check online weather websites: There are many websites that provide up-to-date weather information, such as AccuWeather, Weather.com, or the National Weather Service. You can enter \"San Francisco\" in the search bar and get the current weather conditions, forecast, and radar imagery.\n",
+            "2. Use a weather app: You can download a weather app on your smartphone that provides real-time weather information. Some popular weather apps include Dark Sky, Weather Underground, and The Weather Channel.\n",
+            "3. Tune into local news: You can watch local news channels or listen to local radio stations to get the latest weather forecast and current conditions.\n",
+            "4. Check social media: Follow local weather accounts on social media platforms like Twitter or Facebook to\"\n",
+            "}\n",
+            "}\n",
+            "  ],\n",
+            "  'created': 1692323365.8261144,\n",
+            "  'model': 'togethercomputer/llama-2-70b-chat',\n",
+            "  'usage': {'prompt_tokens': 9, 'completion_tokens': 176, 'total_tokens': 185}\n",
+            "}\n"
          ]
        }
      ],
      "source": [
        "model_name = \"togethercomputer/llama-2-70b-chat\"\n",
-        "response = completion(model=model_name, messages=messages, custom_llm_provider=\"together_ai\")\n",
+        "response = completion(model=model_name, messages=messages, max_tokens=200)\n",
        "print(response)"
      ]
    },
@ -85,46 +106,569 @@
    },
    {
      "cell_type": "code",
-      "execution_count": 8,
+      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "wuBhlZtC6MH5",
-        "outputId": "fcb82177-6494-4963-8e37-8716d3b9e616"
+        "outputId": "1bedc981-4ab1-4abd-9b81-a9727223b66a"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
-            "<litellm.utils.CustomStreamWrapper object at 0x7ad005e93ee0>\n",
-            "{'role': 'assistant', 'content': '\\\\n'}\n",
-            "{'role': 'assistant', 'content': '\\\\n'}\n",
-            "{'role': 'assistant', 'content': 'I'}\n",
-            "{'role': 'assistant', 'content': 'm'}\n",
-            "{'role': 'assistant', 'content': ' not'}\n",
-            "{'role': 'assistant', 'content': ' able'}\n",
-            "{'role': 'assistant', 'content': ' to'}\n",
-            "{'role': 'assistant', 'content': ' provide'}\n",
-            "{'role': 'assistant', 'content': ' real'}\n",
-            "{'role': 'assistant', 'content': '-'}\n",
-            "{'role': 'assistant', 'content': 'time'}\n",
-            "{'role': 'assistant', 'content': ' weather'}\n",
-            "{'role': 'assistant', 'content': ' information'}\n",
-            "{'role': 'assistant', 'content': '.'}\n",
-            "{'role': 'assistant', 'content': ' However'}\n",
-            "{'role': 'assistant', 'content': ','}\n",
-            "{'role': 'assistant', 'content': ' I'}\n",
-            "{'role': 'assistant', 'content': ' can'}\n"
+            "<async_generator object together_ai_completion_streaming at 0x7d39eeae81c0>\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'Y'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' Com'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'bin'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ator'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' ('}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'Y'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ')'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' are'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' two'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' popular'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' startup'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' acceler'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ators'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' that'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' have'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' gained'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' recognition'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' for'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' their'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' effect'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'iveness'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' n'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'urt'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'uring'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' scaling'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' early'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '-'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'stage'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' companies'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ities'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' they'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' also'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' have'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' distinct'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' differences'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' that'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' set'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' them'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' apart'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' In'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' this'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' ess'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ay'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' we'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' will'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' explore'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' key'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' features'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' Y'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' discuss'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' which'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' program'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' might'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' be'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' better'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' fit'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' for'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' your'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' startup'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'Y'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' Com'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'bin'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ator'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' one'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' most'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' successful'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' startup'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' acceler'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ators'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' world'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' with'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' port'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'folio'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' that'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' includes'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' Air'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'b'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'nb'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' Drop'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'box'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' Red'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'dit'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' F'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ounded'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' '}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '2'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '5'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' Y'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' has'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' fund'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ed'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' over'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' '}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '1'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '9'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' start'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ups'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' with'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' combined'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' valu'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ation'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' over'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' $'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '1'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' billion'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' The'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' program'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' known'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' for'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' its'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' inten'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'se'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' three'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '-'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'month'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' boot'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' camp'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '-'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'style'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' format'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' where'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' found'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ers'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' work'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' closely'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' with'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' experienced'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' ment'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ors'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' develop'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' their'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' products'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' ref'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ine'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' their'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' business'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' models'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' prepare'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' for'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' fund'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ra'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ising'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' Y'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': \"'\"}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 's'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' focus'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' on'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' software'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' technology'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' internet'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' start'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ups'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' program'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' has'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' strong'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' track'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' record'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' ident'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ifying'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' n'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'urt'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'uring'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' successful'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' companies'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' these'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' spaces'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'l'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' on'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' other'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' hand'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' relatively'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' new'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' acceler'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ator'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' program'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' that'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' was'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' founded'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' '}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '2'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '1'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '7'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' While'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' it'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' may'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' not'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' have'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' same'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' level'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' brand'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' recognition'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' as'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' Y'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' has'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' quickly'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' gained'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' reputation'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' for'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' its'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' unique'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' approach'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' startup'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' acceleration'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' The'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' program'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' focus'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'es'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' on'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' supporting'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' under'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 're'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'present'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ed'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' found'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ers'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' particularly'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' women'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' people'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' color'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' provides'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' range'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' resources'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' support'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' help'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' these'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' found'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ers'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' succeed'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': \"'\"}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 's'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' program'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' designed'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' be'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' more'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' flexible'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' personal'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ized'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' than'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' traditional'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' acceler'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ators'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' with'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' focus'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' on'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' connecting'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' found'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ers'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' with'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' ment'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ors'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' resources'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' that'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' are'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' tail'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ored'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' their'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' specific'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' needs'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'One'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' key'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' difference'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' between'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' Y'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' type'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' companies'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' they'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' support'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' Y'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' focus'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'es'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' primarily'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' on'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' software'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' technology'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' internet'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' start'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ups'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' while'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' has'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' bro'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ader'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' focus'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' that'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' includes'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' range'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' indust'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ries'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' such'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' as'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' health'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'care'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' fin'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ance'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' consumer'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' products'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' This'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' means'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' that'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' if'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' your'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' startup'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' non'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '-'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'tech'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' industry'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' may'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' be'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' a'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' better'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' fit'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '\\n'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'An'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'other'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' difference'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' between'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' two'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' programs'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' is'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' their'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' approach'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' fund'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ing'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' Y'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'C'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' provides'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' seed'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' fund'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ing'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' all'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' its'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' port'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'folio'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' companies'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' typically'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' the'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' range'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' of'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' $'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '1'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' $'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '2'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '0'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' In'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' contrast'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' l'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ite'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'LL'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'M'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' does'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' not'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' provide'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' fund'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': 'ing'}}]}\n",
+            "{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n"
          ]
        }
      ],
      "source": [
-        "response = completion(model=model_name, messages=messages, stream=True, custom_llm_provider=\"together_ai\")\n",
-        "print(response)\n",
-        "for chunk in response:\n",
-        "  print(chunk['choices'][0]['delta']) # same as openai format"
+        "user_message = \"Write 1page essay on YC + liteLLM\"\n",
+        "messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
+        "\n",
+        "\n",
+        "import asyncio\n",
+        "async def parse_stream(stream):\n",
+        "    async for elem in stream:\n",
+        "        print(elem)\n",
+        "    return\n",
+        "\n",
+        "stream = completion(model=\"togethercomputer/llama-2-70b-chat\", messages=messages, stream=True, max_tokens=800)\n",
+        "print(stream)\n",
+        "\n",
+        "# Await the asynchronous function directly in the notebook cell\n",
+        "await parse_stream(stream)\n"
      ]
    }
  ],
--- a/cookbook/liteLLM_Langchain_Demo.ipynb
+++ b/cookbook/liteLLM_Langchain_Demo.ipynb
@ -0,0 +1,201 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Langchain liteLLM Demo Notebook\n",
+        "## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
+        "Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
+        "\n",
+        "Call all LLM models using the same I/O interface\n",
+        "\n",
+        "Example usage\n",
+        "```python\n",
+        "ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
+        "ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
+        "ChatLiteLLM(model=\"command-nightly\")\n",
+        "ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
+        "```"
+      ],
+      "metadata": {
+        "id": "5hwntUxTMxEk"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "aPNAUsCvB6Sv"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install litellm langchain"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "from langchain.chat_models import ChatLiteLLM\n",
+        "from langchain.prompts.chat import (\n",
+        "    ChatPromptTemplate,\n",
+        "    SystemMessagePromptTemplate,\n",
+        "    AIMessagePromptTemplate,\n",
+        "    HumanMessagePromptTemplate,\n",
+        ")\n",
+        "from langchain.schema import AIMessage, HumanMessage, SystemMessage"
+      ],
+      "metadata": {
+        "id": "MOhRaVnhB-0J"
+      },
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "os.environ['OPENAI_API_KEY'] = \"\"\n",
+        "chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
+        "messages = [\n",
+        "    HumanMessage(\n",
+        "        content=\"what model are you\"\n",
+        "    )\n",
+        "]\n",
+        "chat(messages)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "TahkCtlmCD65",
+        "outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
+      },
+      "execution_count": 17,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 17
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
+        "chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
+        "messages = [\n",
+        "    HumanMessage(\n",
+        "        content=\"what model are you\"\n",
+        "    )\n",
+        "]\n",
+        "chat(messages)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "uXNDyU4jChcs",
+        "outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
+      },
+      "execution_count": 23,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 23
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
+        "chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
+        "messages = [\n",
+        "    HumanMessage(\n",
+        "        content=\"what model are you?\"\n",
+        "    )\n",
+        "]\n",
+        "chat(messages)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "czbDJRKcC7BV",
+        "outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
+      },
+      "execution_count": 27,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 27
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "os.environ['COHERE_API_KEY'] = \"\"\n",
+        "chat = ChatLiteLLM(model=\"command-nightly\")\n",
+        "messages = [\n",
+        "    HumanMessage(\n",
+        "        content=\"what model are you?\"\n",
+        "    )\n",
+        "]\n",
+        "chat(messages)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "tZxpq5PDDY9Y",
+        "outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
+      },
+      "execution_count": 30,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 30
+        }
+      ]
+    }
+  ]
+}
--- a/cookbook/proxy-server/readme.md
+++ b/cookbook/proxy-server/readme.md
@ -8,6 +8,8 @@

 [![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/DYqQAW?referralCode=t3ukrU)

+![4BC6491E-86D0-4833-B061-9F54524B2579](https://github.com/BerriAI/litellm/assets/17561003/f5dd237b-db5e-42e1-b1ac-f05683b1d724)
+
 ## What does liteLLM proxy do
 - Make `/chat/completions` requests for 50+ LLM models **Azure, OpenAI, Replicate, Anthropic, Hugging Face**
  
@ -156,3 +158,11 @@ This project includes a `Dockerfile` allowing you to build and deploy a Docker P
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai


+## Roadmap
+- [ ] Support hosted db (e.g. Supabase)
+- [ ] Easily send data to places like posthog and sentry.
+- [ ] Add a hot-cache for project spend logs - enables fast checks for user + project limitings
+- [ ] Implement user-based rate-limiting
+- [ ] Spending controls per project - expose key creation endpoint
+- [ ] Need to store a keys db -> mapping created keys to their alias (i.e. project name)
+- [ ] Easily add new models as backups / as the entry-point (add this to the available model list)
--- a/dist/litellm-0.1.401-py3-none-any.whl
+++ b/dist/litellm-0.1.401-py3-none-any.whl
--- a/dist/litellm-0.1.401.tar.gz
+++ b/dist/litellm-0.1.401.tar.gz
--- a/dist/litellm-0.1.432-py3-none-any.whl
+++ b/dist/litellm-0.1.432-py3-none-any.whl
--- a/dist/litellm-0.1.432.tar.gz
+++ b/dist/litellm-0.1.432.tar.gz
--- a/dist/litellm-0.1.434-py3-none-any.whl
+++ b/dist/litellm-0.1.434-py3-none-any.whl
--- a/dist/litellm-0.1.434.tar.gz
+++ b/dist/litellm-0.1.434.tar.gz
--- a/dist/litellm-0.1.435-py3-none-any.whl
+++ b/dist/litellm-0.1.435-py3-none-any.whl
--- a/dist/litellm-0.1.435.tar.gz
+++ b/dist/litellm-0.1.435.tar.gz
--- a/docs/my-website/blog/2019-05-28-first-blog-post.md
+++ b/docs/my-website/blog/2019-05-28-first-blog-post.md
@ -1,12 +0,0 @@
---
-slug: first-blog-post
-title: First Blog Post
-authors:
-  name: Gao Wei
-  title: Docusaurus Core Team
-  url: https://github.com/wgao19
-  image_url: https://github.com/wgao19.png
-tags: [hola, docusaurus]
---
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
--- a/docs/my-website/blog/2019-05-29-long-blog-post.md
+++ b/docs/my-website/blog/2019-05-29-long-blog-post.md
@ -1,44 +0,0 @@
---
-slug: long-blog-post
-title: Long Blog Post
-authors: endi
-tags: [hello, docusaurus]
---
-
-This is the summary of a very long blog post,
-
-Use a `<!--` `truncate` `-->` comment to limit blog post size in the list view.
-
-<!--truncate-->
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
-
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet
--- a/docs/my-website/blog/2021-08-01-mdx-blog-post.mdx
+++ b/docs/my-website/blog/2021-08-01-mdx-blog-post.mdx
@ -1,20 +0,0 @@
---
-slug: mdx-blog-post
-title: MDX Blog Post
-authors: [slorber]
-tags: [docusaurus]
---
-
-Blog posts support [Docusaurus Markdown features](https://docusaurus.io/docs/markdown-features), such as [MDX](https://mdxjs.com/).
-
-:::tip
-
-Use the power of React to create interactive blog posts.
-
-```js
-<button onClick={() => alert('button clicked!')}>Click me!</button>
-```
-
-<button onClick={() => alert('button clicked!')}>Click me!</button>
-
-:::
--- a/docs/my-website/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg
+++ b/docs/my-website/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg
--- a/docs/my-website/blog/2021-08-26-welcome/index.md
+++ b/docs/my-website/blog/2021-08-26-welcome/index.md
@ -1,25 +1,43 @@
---
-slug: welcome
-title: Welcome
-authors: [slorber, yangshun]
-tags: [facebook, hello, docusaurus]
---
+# 🚅 litellm
+a light 100 line package to simplify calling OpenAI, Azure, Cohere, Anthropic APIs 

-[Docusaurus blogging features](https://docusaurus.io/docs/blog) are powered by the [blog plugin](https://docusaurus.io/docs/api/plugins/@docusaurus/plugin-content-blog).
+###### litellm manages:
+* Calling all LLM APIs using the OpenAI format - `completion(model, messages)`
+* Consistent output for all LLM APIs, text responses will always be available at `['choices'][0]['message']['content']`
+* Consistent Exceptions for all LLM APIs, we map RateLimit, Context Window, and Authentication Error exceptions across all providers to their OpenAI equivalents. [see Code](https://github.com/BerriAI/litellm/blob/ba1079ff6698ef238c5c7f771dd2b698ec76f8d9/litellm/utils.py#L250)

-Simply add Markdown files (or folders) to the `blog` directory.
+###### observability:
+* Logging - see exactly what the raw model request/response is by plugging in your own function `completion(.., logger_fn=your_logging_fn)` and/or print statements from the package `litellm.set_verbose=True`
+* Callbacks - automatically send your data to Helicone, Sentry, Posthog, Slack - `litellm.success_callbacks`, `litellm.failure_callbacks` [see Callbacks](https://litellm.readthedocs.io/en/latest/advanced/)

-Regular blog authors can be added to `authors.yml`.
+## Quick Start
+Go directly to code: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
+### Installation
+```
+pip install litellm
+```

-The blog post date can be extracted from filenames, such as:
+### Usage
+```python
+from litellm import completion

- `2019-05-30-welcome.md`
- `2019-05-30-welcome/index.md`
+## set ENV variables
+os.environ["OPENAI_API_KEY"] = "openai key"
+os.environ["COHERE_API_KEY"] = "cohere key"

-A blog post folder can be convenient to co-locate blog post images:
+messages = [{ "content": "Hello, how are you?","role": "user"}]

-![Docusaurus Plushie](./docusaurus-plushie-banner.jpeg)
+# openai call
+response = completion(model="gpt-3.5-turbo", messages=messages)

-The blog supports tags as well!
+# cohere call
+response = completion("command-nightly", messages)
+```
+Need Help / Support : [see troubleshooting](https://litellm.readthedocs.io/en/latest/troubleshoot)

-**And if you don't want a blog**: just delete this directory, and use `blog: false` in your Docusaurus config.
+## Why did we build liteLLM 
+- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI, Cohere
+
+## Support
+* [Meet with us 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
+* Contact us at ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/blog/authors.yml
+++ b/docs/my-website/blog/authors.yml
@ -1,17 +0,0 @@
-endi:
-  name: Endilie Yacop Sucipto
-  title: Maintainer of Docusaurus
-  url: https://github.com/endiliey
-  image_url: https://github.com/endiliey.png
-
-yangshun:
-  name: Yangshun Tay
-  title: Front End Engineer @ Facebook
-  url: https://github.com/yangshun
-  image_url: https://github.com/yangshun.png
-
-slorber:
-  name: Sébastien Lorber
-  title: Docusaurus maintainer
-  url: https://sebastienlorber.com
-  image_url: https://github.com/slorber.png
--- a/docs/my-website/docs/caching.md
+++ b/docs/my-website/docs/caching.md
@ -0,0 +1,42 @@
+# Caching Completion() Responses
+
+liteLLM implements exact match caching. It can be enabled by setting
+1. `litellm.caching`: When set to `True`, enables caching for all responses. Keys are the input `messages` and values store in the cache is the corresponding `response`
+
+2. `litellm.caching_with_models`: When set to `True`, enables caching on a per-model basis.Keys are the input `messages + model` and values store in the cache is the corresponding `response` 
+
+## Usage
+1. Caching - cache
+Keys in the cache are `model`, the following example will lead to a cache hit
+```python
+litellm.caching = True
+
+# Make completion calls
+response1 = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Tell me a joke."}])
+response2 = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Tell me a joke."}])
+
+# response1 == response2, response 1 is cached
+
+# with a diff model
+response3 = completion(model="command-nightly", messages=[{"role": "user", "content": "Tell me a joke."}])
+
+# response3 == response1 == response2, since keys are messages
+```
+
+
+2. Caching with Models - caching_with_models
+Keys in the cache are `messages + model`, the following example will not lead to a cache hit
+```python
+litellm.caching_with_models = True
+
+# Make completion calls
+response1 = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Tell me a joke."}])
+response2 = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Tell me a joke."}])
+# response1 == response2, response 1 is cached
+
+# with a diff model, this will call the API since the key is not cached
+response3 = completion(model="command-nightly", messages=[{"role": "user", "content": "Tell me a joke."}])
+
+# response3 != response1, since keys are messages + model
+```
+
--- a/docs/my-website/docs/completion/input.md
+++ b/docs/my-website/docs/completion/input.md
@ -1,6 +1,6 @@
-# Completion Function - completion()
+# Input Format - completion()
 The Input params are **exactly the same** as the 
-<a href="https://platform.openai.com/docs/api-reference/chat/create" target="_blank" rel="noopener noreferrer">OpenAI Create chat completion</a>, and let you call **Azure OpenAI, Anthropic, Cohere, Replicate, OpenRouter** models in the same format. 
+<a href="https://platform.openai.com/docs/api-reference/chat/create" target="_blank" rel="noopener noreferrer">OpenAI Create chat completion</a>, and let you call Azure OpenAI, Anthropic, Cohere, Replicate, OpenRouter models in the same format. 

 In addition, liteLLM allows you to pass in the following **Optional** liteLLM args:
 `force_timeout`, `azure`, `logger_fn`, `verbose`
--- a/docs/my-website/docs/completion/output.md
+++ b/docs/my-website/docs/completion/output.md
@ -1,12 +1,50 @@
-# Completion Function - completion()
-Here's the exact json output you can expect from a litellm `completion` call:
+# Output Format - completion()
+Here's the exact json output and type you can expect from all litellm `completion` calls for all models

 ```python 
-{'choices': [{'finish_reason': 'stop',
-   'index': 0,
-   'message': {'role': 'assistant',
-    'content': " I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic."}}],
+{
+  'choices': [
+    {
+      'finish_reason': str,     # String: 'stop'
+      'index': int,             # Integer: 0
+      'message': {              # Dictionary [str, str]
+        'role': str,            # String: 'assistant'
+        'content': str          # String: "default message"
+      }
+    }
+  ],
+  'created': str,               # String: None
+  'model': str,                 # String: None
+  'usage': {                    # Dictionary [str, int]
+    'prompt_tokens': int,       # Integer
+    'completion_tokens': int,   # Integer
+    'total_tokens': int         # Integer
+  }
+}
+
+```
+
+You can access the response as a dictionary or as a class object, just as OpenAI allows you
+```python
+print(response.choices[0].message.content)
+print(response['choices'][0]['message']['content'])
+```
+
+Here's what an example response looks like 
+```python
+{
+  'choices': [
+     {
+        'finish_reason': 'stop',
+        'index': 0,
+        'message': {
+           'role': 'assistant',
+            'content': " I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic."
+        }
+      }
+    ],
 'created': 1691429984.3852863,
 'model': 'claude-instant-1',
- 'usage': {'prompt_tokens': 18, 'completion_tokens': 23, 'total_tokens': 41}}
+ 'usage': {'prompt_tokens': 18, 'completion_tokens': 23, 'total_tokens': 41}
+}
 ```
--- a/docs/my-website/docs/completion/supported.md
+++ b/docs/my-website/docs/completion/supported.md
@ -1,4 +1,12 @@
-# Generation/Completion/Chat Completion Models
+# Supported Chat, Completion Models
+
+## API Keys 
+liteLLM reads key naming, all keys should be named in the following format:
+`<PROVIDER>_API_KEY` for example
+* `OPENAI_API_KEY`      Provider = OpenAI
+* `TOGETHERAI_API_KEY`  Provider = TogetherAI
+* `HUGGINGFACE_API_KEY` Provider = HuggingFace
+

 ### OpenAI Chat Completion Models

@ -49,6 +57,7 @@ VertexAI requires you to set `application_default_credentials.json`, this can be
 | Model Name       | Function Call                              | Required OS Variables                |
 |------------------|--------------------------------------------|--------------------------------------|
 | claude-instant-1  | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
+| claude-instant-1.2  | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
 | claude-2  | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |

 ### Hugging Face Inference API
@ -64,10 +73,10 @@ Here are some examples of supported models:

 | Model Name       | Function Call                                                                       | Required OS Variables                |
 |------------------|-------------------------------------------------------------------------------------|--------------------------------------|
-| [stabilityai/stablecode-completion-alpha-3b-4k](https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k)  | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']`       |
-| [bigcode/starcoder](https://huggingface.co/bigcode/starcoder)                           | `completion(model="bigcode/starcoder", messages=messages, custom_llm_provider="huggingface")`          | `os.environ['HF_TOKEN']`       |
-| [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl)                         | `completion(model="google/flan-t5-xxl", messages=messages, custom_llm_provider="huggingface")`         | `os.environ['HF_TOKEN']`       |
-| [google/flan-t5-large](https://huggingface.co/google/flan-t5-large)                     | `completion(model="google/flan-t5-large", messages=messages, custom_llm_provider="huggingface")`       | `os.environ['HF_TOKEN']`       |
+| [stabilityai/stablecode-completion-alpha-3b-4k](https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k)  | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']`       |
+| [bigcode/starcoder](https://huggingface.co/bigcode/starcoder)                           | `completion(model="bigcode/starcoder", messages=messages, custom_llm_provider="huggingface")`          | `os.environ['HUGGINGFACE_API_KEY']`       |
+| [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl)                         | `completion(model="google/flan-t5-xxl", messages=messages, custom_llm_provider="huggingface")`         | `os.environ['HUGGINGFACE_API_KEY']`       |
+| [google/flan-t5-large](https://huggingface.co/google/flan-t5-large)                     | `completion(model="google/flan-t5-large", messages=messages, custom_llm_provider="huggingface")`       | `os.environ['HUGGINGFACE_API_KEY']`       |
 ### AI21 Models

 | Model Name       | Function Call                              | Required OS Variables                |
@ -82,9 +91,24 @@ Here are some examples of supported models:
 |------------------|--------------------------------------------|--------------------------------------|
 | command-nightly  | `completion('command-nightly', messages)`  | `os.environ['COHERE_API_KEY']`       |

-### BaseTen Models
+### Together AI Models
+liteLLM supports `non-streaming` and `streaming` requests to all models on https://api.together.xyz/
+
+Example TogetherAI Usage - Note: liteLLM supports all models deployed on TogetherAI
+
+| Model Name                        | Function Call                                                          | Required OS Variables           |
+|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
+| togethercomputer/llama-2-70b-chat  | `completion('togethercomputer/llama-2-70b-chat', messages)`            | `os.environ['TOGETHERAI_API_KEY']` |
+| togethercomputer/LLaMA-2-13b-chat  | `completion('togethercomputer/LLaMA-2-13b-chat', messages)`            | `os.environ['TOGETHERAI_API_KEY']` |
+| togethercomputer/code-and-talk-v1 | `completion('togethercomputer/code-and-talk-v1', messages)`           | `os.environ['TOGETHERAI_API_KEY']` |
+| togethercomputer/creative-v1      | `completion('togethercomputer/creative-v1', messages)`                | `os.environ['TOGETHERAI_API_KEY']` |
+| togethercomputer/yourmodel        | `completion('togethercomputer/yourmodel', messages)`                  | `os.environ['TOGETHERAI_API_KEY']` |
+
+
+### Baseten Models
 Baseten provides infrastructure to deploy and serve ML models https://www.baseten.co/. Use liteLLM to easily call models deployed on Baseten.

+Example Baseten Usage - Note: liteLLM supports all models deployed on Basten

 | Model Name       | Function Call                                  | Required OS Variables              |
 |------------------|--------------------------------------------|------------------------------------|
@ -99,13 +123,37 @@ All the text models from [OpenRouter](https://openrouter.ai/docs) are supported

 | Model Name       | Function Call                              | Required OS Variables                |
 |------------------|--------------------------------------------|--------------------------------------|
-| openai/gpt-3.5-turbo | `completion('openai/gpt-3.5-turbo', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']`       |
-| openai/gpt-3.5-turbo-16k | `completion('openai/gpt-3.5-turbo-16k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']`       |
-| openai/gpt-4 | `completion('openai/gpt-4', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']`       |
-| openai/gpt-4-32k | `completion('openai/gpt-4-32k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']`       |
-| anthropic/claude-2 | `completion('anthropic/claude-2', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']`       |
-| anthropic/claude-instant-v1 | `completion('anthropic/claude-instant-v1', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']`       |
-| google/palm-2-chat-bison | `completion('google/palm-2-chat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']`       |
-| google/palm-2-codechat-bison | `completion('google/palm-2-codechat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']`       |
-| meta-llama/llama-2-13b-chat | `completion('meta-llama/llama-2-13b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']`       |
-| meta-llama/llama-2-70b-chat | `completion('meta-llama/llama-2-70b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']`       |
+| openai/gpt-3.5-turbo | `completion('openai/gpt-3.5-turbo', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']`       |
+| openai/gpt-3.5-turbo-16k | `completion('openai/gpt-3.5-turbo-16k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']`       |
+| openai/gpt-4 | `completion('openai/gpt-4', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']`       |
+| openai/gpt-4-32k | `completion('openai/gpt-4-32k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']`       |
+| anthropic/claude-2 | `completion('anthropic/claude-2', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']`       |
+| anthropic/claude-instant-v1 | `completion('anthropic/claude-instant-v1', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']`       |
+| google/palm-2-chat-bison | `completion('google/palm-2-chat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']`       |
+| google/palm-2-codechat-bison | `completion('google/palm-2-codechat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']`       |
+| meta-llama/llama-2-13b-chat | `completion('meta-llama/llama-2-13b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']`       |
+| meta-llama/llama-2-70b-chat | `completion('meta-llama/llama-2-70b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']`       |
+
+### Petals Models
+Supported models on https://chat.petals.dev/
+
+| Model Name           | Function Call                                                          | Required OS Variables          |
+|----------------------|------------------------------------------------------------------------|--------------------------------|
+| stabilityai/StableBeluga2 | `completion(model='stabilityai/StableBeluga2', messages, custom_llm_provider="petals")` | No API Key required          |
+| enoch/llama-65b-hf   | `completion(model='enoch/llama-65b-hf', messages, custom_llm_provider="petals")` | No API Key required          |
+| bigscience/bloomz    | `completion(model='bigscience/bloomz', messages, custom_llm_provider="petals")` | No API Key required          |
+
+### Ollama Models
+Ollama supported models: https://github.com/jmorganca/ollama
+
+| Model Name           | Function Call                                                                     | Required OS Variables          |
+|----------------------|-----------------------------------------------------------------------------------|--------------------------------|
+| Llama2 7B            | `completion(model='llama2', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Llama2 13B           | `completion(model='llama2:13b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Llama2 70B           | `completion(model='llama2:70b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Llama2 Uncensored    | `completion(model='llama2-uncensored', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Orca Mini            | `completion(model='orca-mini', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Vicuna               | `completion(model='vicuna', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Nous-Hermes          | `completion(model='nous-hermes', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Nous-Hermes 13B      | `completion(model='nous-hermes:13b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Wizard Vicuna Uncensored | `completion(model='wizard-vicuna', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
--- a/docs/my-website/docs/index.md
+++ b/docs/my-website/docs/index.md
@ -1,30 +1,32 @@
-# 🚅 litellm
+# litellm

-a light 100 line package to simplify calling OpenAI, Azure, Cohere, Anthropic APIs
+[![PyPI Version](https://img.shields.io/pypi/v/litellm.svg)](https://pypi.org/project/litellm/)
+[![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.345-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/)
+[![CircleCI](https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main)
+![Downloads](https://img.shields.io/pypi/dm/litellm)
+[![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere%7CReplicate%7CHugging%20Face-blue?color=green)](https://github.com/BerriAI/litellm)

-###### litellm manages:
+[![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw)

- Calling all LLM APIs using the OpenAI format - `completion(model, messages)`
- Consistent output for all LLM APIs, text responses will always be available at `['choices'][0]['message']['content']`
- Consistent Exceptions for all LLM APIs, we map RateLimit, Context Window, and Authentication Error exceptions across all providers to their OpenAI equivalents. [see Code](https://github.com/BerriAI/litellm/blob/ba1079ff6698ef238c5c7f771dd2b698ec76f8d9/litellm/utils.py#L250)
+a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints. It manages:

-###### observability:
+- translating inputs to the provider's completion and embedding endpoints
+- guarantees [consistent output](https://litellm.readthedocs.io/en/latest/output/), text responses will always be available at `['choices'][0]['message']['content']`
+- exception mapping - common exceptions across providers are mapped to the [OpenAI exception types](https://help.openai.com/en/articles/6897213-openai-library-error-types-guidance)

- Logging - see exactly what the raw model request/response is by plugging in your own function `completion(.., logger_fn=your_logging_fn)` and/or print statements from the package `litellm.set_verbose=True`
- Callbacks - automatically send your data to Helicone, LLMonitor, Sentry, Posthog, Slack - `litellm.success_callbacks`, `litellm.failure_callbacks` [see Callbacks](https://litellm.readthedocs.io/en/latest/advanced/)
+# usage

-## Quick Start
+<a href='https://docs.litellm.ai/docs/completion/supported' target="_blank"><img alt='None' src='https://img.shields.io/badge/Supported_LLMs-100000?style=for-the-badge&logo=None&logoColor=000000&labelColor=000000&color=8400EA'/></a>

-Go directly to code: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
+Demo - https://litellm.ai/playground \
+Read the docs - https://docs.litellm.ai/docs/

-### Installation
+## quick start

 ```
 pip install litellm
 ```

-### Usage
-
 ```python
 from litellm import completion

@ -41,13 +43,37 @@ response = completion(model="gpt-3.5-turbo", messages=messages)
 response = completion("command-nightly", messages)
 ```

-Need Help / Support : [see troubleshooting](https://litellm.readthedocs.io/en/latest/troubleshoot)
+Code Sample: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)

-## Why did we build liteLLM
+Stable version
+
+```
+pip install litellm==0.1.345
+```
+
+## Streaming Queries
+
+liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
+Streaming is supported for OpenAI, Azure, Anthropic, Huggingface models
+
+```python
+response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
+for chunk in response:
+    print(chunk['choices'][0]['delta'])
+
+# claude 2
+result = completion('claude-2', messages, stream=True)
+for chunk in result:
+  print(chunk['choices'][0]['delta'])
+```
+
+# support / talk with founders
+
+- [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
+- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
+- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
+- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
+
+# why did we build this

 - **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI, Cohere
-
-## Support
-
- [Meet with us 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- Contact us at ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/docs/observability/supabase_integration.md
+++ b/docs/my-website/docs/observability/supabase_integration.md
@ -22,11 +22,13 @@ create table
    messages json null default '{}'::json,
    response json null default '{}'::json,
    end_user text null default ''::text,
+    status text null default ''::text,
    error json null default '{}'::json,
    response_time real null default '0'::real,
    total_cost real null,
    additional_details json null default '{}'::json,
-    constraint request_logs_pkey primary key (id)
+    litellm_call_id text unique,
+    primary key (id)
  ) tablespace pg_default;
 ```

--- a/docs/my-website/docusaurus.config.js
+++ b/docs/my-website/docusaurus.config.js
@ -6,8 +6,9 @@ const darkCodeTheme = require('prism-react-renderer/themes/dracula');

 /** @type {import('@docusaurus/types').Config} */
 const config = {
-  title: 'LiteLLM',
+  title: 'liteLLM',
  tagline: 'Simplify LLM API Calls',
+  favicon: '/img/favicon.ico', 

  // Set the production url of your site here
  url: 'https://litellm.vercel.app/',
@ -80,35 +81,27 @@ const config = {
          {
            title: 'Community',
            items: [
-              {
-                label: 'Stack Overflow',
-                href: 'https://stackoverflow.com/questions/tagged/docusaurus',
-              },
              {
                label: 'Discord',
-                href: 'https://discordapp.com/invite/docusaurus',
+                href: 'https://discord.com/invite/wuPM9dRgDw',
              },
              {
                label: 'Twitter',
-                href: 'https://twitter.com/docusaurus',
+                href: 'https://twitter.com/LiteLLM',
              },
            ],
          },
          {
            title: 'More',
            items: [
-              {
-                label: 'Blog',
-                to: '/blog',
-              },
              {
                label: 'GitHub',
-                href: 'https://github.com/facebook/docusaurus',
+                href: 'https://github.com/BerriAI/litellm/',
              },
            ],
          },
        ],
-        copyright: `Copyright © ${new Date().getFullYear()} My Project, Inc. Built with Docusaurus.`,
+        copyright: `Copyright © ${new Date().getFullYear()} liteLLM`,
      },
      prism: {
        theme: lightCodeTheme,
--- a/docs/my-website/index.md
+++ b/docs/my-website/index.md
@ -0,0 +1,25 @@
+---
+slug: welcome
+title: Welcome
+authors: [slorber, yangshun]
+tags: [facebook, hello, docusaurus]
+---
+
+[Docusaurus blogging features](https://docusaurus.io/docs/blog) are powered by the [blog plugin](https://docusaurus.io/docs/api/plugins/@docusaurus/plugin-content-blog).
+
+Simply add Markdown files (or folders) to the `blog` directory.
+
+Regular blog authors can be added to `authors.yml`.
+
+The blog post date can be extracted from filenames, such as:
+
+- `2019-05-30-welcome.md`
+- `2019-05-30-welcome/index.md`
+
+A blog post folder can be convenient to co-locate blog post images:
+
+![Docusaurus Plushie](./docusaurus-plushie-banner.jpeg)
+
+The blog supports tags as well!
+
+**And if you don't want a blog**: just delete this directory, and use `blog: false` in your Docusaurus config.
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -21,14 +21,15 @@ const sidebars = {
    'index',
    {
      type: 'category',
-      label: 'completion_function',
-      items: ['completion/input', 'completion/supported','completion/output'],
+      label: 'Completion()',
+      items: ['completion/input','completion/output'],
    },
    {
      type: 'category',
-      label: 'embedding_function',
+      label: 'Embedding()',
      items: ['embedding/supported_embedding'],
    },
+    'completion/supported',
    {
      type: 'category',
      label: 'Tutorials',
@ -37,6 +38,7 @@ const sidebars = {
    'token_usage',
    'stream',
    'secret',
+    'caching',
    {
      type: 'category',
      label: 'Logging & Observability',
--- a/docs/my-website/src/pages/index.md
+++ b/docs/my-website/src/pages/index.md
@ -1,23 +1,27 @@
-# 🚅 litellm
-a light 100 line package to simplify calling OpenAI, Azure, Cohere, Anthropic APIs 
+# *🚅 litellm*
+[![PyPI Version](https://img.shields.io/pypi/v/litellm.svg)](https://pypi.org/project/litellm/)
+[![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.345-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/)
+[![CircleCI](https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main)
+![Downloads](https://img.shields.io/pypi/dm/litellm)
+[![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere%7CReplicate%7CHugging%20Face-blue?color=green)](https://github.com/BerriAI/litellm)

-###### litellm manages:
-* Calling all LLM APIs using the OpenAI format - `completion(model, messages)`
-* Consistent output for all LLM APIs, text responses will always be available at `['choices'][0]['message']['content']`
-* Consistent Exceptions for all LLM APIs, we map RateLimit, Context Window, and Authentication Error exceptions across all providers to their OpenAI equivalents. [see Code](https://github.com/BerriAI/litellm/blob/ba1079ff6698ef238c5c7f771dd2b698ec76f8d9/litellm/utils.py#L250)
+[![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw)

-###### observability:
-* Logging - see exactly what the raw model request/response is by plugging in your own function `completion(.., logger_fn=your_logging_fn)` and/or print statements from the package `litellm.set_verbose=True`
-* Callbacks - automatically send your data to Helicone, Sentry, Posthog, Slack - `litellm.success_callbacks`, `litellm.failure_callbacks` [see Callbacks](https://litellm.readthedocs.io/en/latest/advanced/)
+a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints. It manages: 
+- translating inputs to the provider's completion and embedding endpoints
+- guarantees [consistent output](https://litellm.readthedocs.io/en/latest/output/), text responses will always be available at `['choices'][0]['message']['content']`
+- exception mapping - common exceptions across providers are mapped to the [OpenAI exception types](https://help.openai.com/en/articles/6897213-openai-library-error-types-guidance)
+# usage
+<a href='https://docs.litellm.ai/docs/completion/supported' target="_blank"><img alt='None' src='https://img.shields.io/badge/Supported_LLMs-100000?style=for-the-badge&logo=None&logoColor=000000&labelColor=000000&color=8400EA'/></a>

-## Quick Start
-Go directly to code: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
-### Installation
+Demo - https://litellm.ai/playground \
+Read the docs - https://docs.litellm.ai/docs/
+
+## quick start
 ```
 pip install litellm
 ```

-### Usage
 ```python
 from litellm import completion

@ -33,11 +37,32 @@ response = completion(model="gpt-3.5-turbo", messages=messages)
 # cohere call
 response = completion("command-nightly", messages)
 ```
-Need Help / Support : [see troubleshooting](https://litellm.readthedocs.io/en/latest/troubleshoot)
+Code Sample: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)

-## Why did we build liteLLM 
+Stable version
+```
+pip install litellm==0.1.345
+```
+
+## Streaming Queries
+liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
+Streaming is supported for OpenAI, Azure, Anthropic, Huggingface models
+```python
+response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
+for chunk in response:
+    print(chunk['choices'][0]['delta'])
+
+# claude 2
+result = completion('claude-2', messages, stream=True)
+for chunk in result:
+  print(chunk['choices'][0]['delta'])
+```
+
+# support / talk with founders
+- [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
+- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
+- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
+- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
+
+# why did we build this 
 - **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI, Cohere
-
-## Support
-* [Meet with us 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
-* Contact us at ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/static/img/favicon.ico
+++ b/docs/my-website/static/img/favicon.ico
--- a/litellm/init.py
+++ b/litellm/init.py
@ -1,50 +1,120 @@
 import threading
-success_callback = []
-failure_callback = []
-set_verbose=False
-telemetry=True
-max_tokens = 256 # OpenAI Defaults
+from typing import Callable, List, Optional
+input_callback: List[str] = []
+success_callback: List[str] = []
+failure_callback: List[str] = []
+set_verbose = False
+telemetry = True
+max_tokens = 256  # OpenAI Defaults
 retry = True
-api_key = None
-openai_key = None 
-azure_key = None 
-anthropic_key = None 
-replicate_key = None 
-cohere_key = None 
-openrouter_key = None
-huggingface_key = None
-vertex_project = None
-vertex_location = None
-
-hugging_api_token = None
+api_key: Optional[str] = None
+openai_key: Optional[str] = None
+azure_key: Optional[str] = None
+anthropic_key: Optional[str] = None
+replicate_key: Optional[str] = None
+cohere_key: Optional[str] = None
+openrouter_key: Optional[str] = None
+huggingface_key: Optional[str] = None
+vertex_project: Optional[str] = None
+vertex_location: Optional[str] = None
+hugging_api_token: Optional[str] = None
+togetherai_api_key: Optional[str] = None
+caching = False
+caching_with_models = False # if you want the caching key to be model + prompt
 model_cost = {
-    "gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
-    "gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
-    "gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
-    "gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
-    "gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
-    "gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
-    "gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
-    "gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
-    "gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
-    "gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
-    "claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
-    "claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
-    "text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
-    "chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
-    "command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
+    "gpt-3.5-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-35-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },  # azure model name
+    "gpt-3.5-turbo-0613": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-0301": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-35-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },  # azure model name
+    "gpt-3.5-turbo-16k-0613": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-4": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-0613": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-32k": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.00006,
+        "output_cost_per_token": 0.00012,
+    },
+    "claude-instant-1": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00000163,
+        "output_cost_per_token": 0.00000551,
+    },
+    "claude-2": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00001102,
+        "output_cost_per_token": 0.00003268,
+    },
+    "text-bison-001": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.000004,
+        "output_cost_per_token": 0.000004,
+    },
+    "chat-bison-001": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000002,
+    },
+    "command-nightly": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000015,
+    },
 }

+
 ####### THREAD-SPECIFIC DATA ###################
 class MyLocal(threading.local):
    def __init__(self):
        self.user = "Hello World"

+
 _thread_context = MyLocal()
+
+
 def identify(event_details):
    # Store user in thread local data
    if "user" in event_details:
        _thread_context.user = event_details["user"]
+
+
 ####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc.
 api_base = None
 headers = None
@ -55,60 +125,48 @@ config_path = None
 secret_manager_client = None
 ####### COMPLETION MODELS ###################
 open_ai_chat_completion_models = [
-  "gpt-4",
-  "gpt-4-0613",
-  "gpt-4-32k",
-  "gpt-4-32k-0613",
-  #################
-  "gpt-3.5-turbo",
-  "gpt-3.5-turbo-16k",
-  "gpt-3.5-turbo-0613",
-  "gpt-3.5-turbo-16k-0613",
-]
-open_ai_text_completion_models = [
-    'text-davinci-003'
+    "gpt-4",
+    "gpt-4-0613",
+    "gpt-4-32k",
+    "gpt-4-32k-0613",
+    #################
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-16k",
+    "gpt-3.5-turbo-0613",
+    "gpt-3.5-turbo-16k-0613",
 ]
+open_ai_text_completion_models = ["text-davinci-003"]

 cohere_models = [
-    'command-nightly',
-    "command", 
-    "command-light", 
-    "command-medium-beta", 
-    "command-xlarge-beta"
+    "command-nightly",
+    "command",
+    "command-light",
+    "command-medium-beta",
+    "command-xlarge-beta",
 ]

-anthropic_models = [
-  "claude-2", 
-  "claude-instant-1",
-  "claude-instant-1.2"
-]
+anthropic_models = ["claude-2", "claude-instant-1", "claude-instant-1.2"]

 replicate_models = [
    "replicate/"
-] # placeholder, to make sure we accept any replicate model in our model_list 
+]  # placeholder, to make sure we accept any replicate model in our model_list

 openrouter_models = [
-    'google/palm-2-codechat-bison',
-    'google/palm-2-chat-bison',
-    'openai/gpt-3.5-turbo',
-    'openai/gpt-3.5-turbo-16k',
-    'openai/gpt-4-32k',
-    'anthropic/claude-2',
-    'anthropic/claude-instant-v1',
-    'meta-llama/llama-2-13b-chat',
-    'meta-llama/llama-2-70b-chat'
+    "google/palm-2-codechat-bison",
+    "google/palm-2-chat-bison",
+    "openai/gpt-3.5-turbo",
+    "openai/gpt-3.5-turbo-16k",
+    "openai/gpt-4-32k",
+    "anthropic/claude-2",
+    "anthropic/claude-instant-v1",
+    "meta-llama/llama-2-13b-chat",
+    "meta-llama/llama-2-70b-chat",
 ]

-vertex_chat_models = [
-    "chat-bison",
-    "chat-bison@001"
-]
+vertex_chat_models = ["chat-bison", "chat-bison@001"]


-vertex_text_models = [
-    "text-bison",
-    "text-bison@001"
-]
+vertex_text_models = ["text-bison", "text-bison@001"]

 huggingface_models = [
    "meta-llama/Llama-2-7b-hf",
@ -123,24 +181,56 @@ huggingface_models = [
    "meta-llama/Llama-2-13b-chat",
    "meta-llama/Llama-2-70b",
    "meta-llama/Llama-2-70b-chat",
-] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/completion/supported
+]  # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/completion/supported

-ai21_models = [
-    "j2-ultra",
-    "j2-mid",
-    "j2-light"
+ai21_models = ["j2-ultra", "j2-mid", "j2-light"]
+
+model_list = (
+    open_ai_chat_completion_models
+    + open_ai_text_completion_models
+    + cohere_models
+    + anthropic_models
+    + replicate_models
+    + openrouter_models
+    + huggingface_models
+    + vertex_chat_models
+    + vertex_text_models
+    + ai21_models
+)
+
+provider_list = [
+    "openai",
+    "cohere",
+    "anthropic",
+    "replicate",
+    "huggingface",
+    "together_ai",
+    "openrouter",
+    "vertex_ai",
+    "ai21",
 ]
-
-model_list = open_ai_chat_completion_models + open_ai_text_completion_models + cohere_models + anthropic_models + replicate_models + openrouter_models + huggingface_models + vertex_chat_models + vertex_text_models + ai21_models
-
-
 ####### EMBEDDING MODELS ###################
-open_ai_embedding_models = [
-    'text-embedding-ada-002'
-]
+open_ai_embedding_models = ["text-embedding-ada-002"]

 from .timeout import timeout
-from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost, load_test_model, get_litellm_params
-from .main import *  # Import all the symbols from main.py
+from .testing import *
+from .utils import (
+    client,
+    exception_type,
+    get_optional_params,
+    modify_integration,
+    token_counter,
+    cost_per_token,
+    completion_cost,
+    get_litellm_params,
+    Logging
+)
+from .main import *  # type: ignore
 from .integrations import *
-from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
+from openai.error import (
+    AuthenticationError,
+    InvalidRequestError,
+    RateLimitError,
+    ServiceUnavailableError,
+    OpenAIError,
+)
--- a/litellm/pycache/init.cpython-311.pyc
+++ b/litellm/pycache/init.cpython-311.pyc
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/timeout.cpython-311.pyc
+++ b/litellm/pycache/timeout.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@ -0,0 +1,62 @@
+## LiteLLM versions of the OpenAI Exception Types
+from openai.error import (
+    AuthenticationError,
+    InvalidRequestError,
+    RateLimitError,
+    ServiceUnavailableError,
+    OpenAIError,
+)
+
+
+class AuthenticationError(AuthenticationError):  # type: ignore
+    def __init__(self, message, llm_provider):
+        self.status_code = 401
+        self.message = message
+        self.llm_provider = llm_provider
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+
+
+class InvalidRequestError(InvalidRequestError):  # type: ignore
+    def __init__(self, message, model, llm_provider):
+        self.status_code = 400
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        super().__init__(
+            self.message, f"{self.model}"
+        )  # Call the base class constructor with the parameters it needs
+
+
+class RateLimitError(RateLimitError):  # type: ignore
+    def __init__(self, message, llm_provider):
+        self.status_code = 429
+        self.message = message
+        self.llm_provider = llm_provider
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+
+
+class ServiceUnavailableError(ServiceUnavailableError):  # type: ignore
+    def __init__(self, message, llm_provider):
+        self.status_code = 500
+        self.message = message
+        self.llm_provider = llm_provider
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+
+
+class OpenAIError(OpenAIError):  # type: ignore
+    def __init__(self, original_exception):
+        self.status_code = original_exception.http_status
+        super().__init__(
+            http_body=original_exception.http_body,
+            http_status=original_exception.http_status,
+            json_body=original_exception.json_body,
+            headers=original_exception.headers,
+            code=original_exception.code,
+        )
+        self.llm_provider = "openai"
--- a/litellm/integrations/init.py
+++ b/litellm/integrations/init.py
@ -1 +1 @@
-from . import *
+from . import *
--- a/litellm/integrations/pycache/init.cpython-311.pyc
+++ b/litellm/integrations/pycache/init.cpython-311.pyc
--- a/litellm/integrations/pycache/aispend.cpython-311.pyc
+++ b/litellm/integrations/pycache/aispend.cpython-311.pyc
--- a/litellm/integrations/pycache/berrispend.cpython-311.pyc
+++ b/litellm/integrations/pycache/berrispend.cpython-311.pyc
--- a/litellm/integrations/pycache/helicone.cpython-311.pyc
+++ b/litellm/integrations/pycache/helicone.cpython-311.pyc
--- a/litellm/integrations/pycache/supabase.cpython-311.pyc
+++ b/litellm/integrations/pycache/supabase.cpython-311.pyc
--- a/litellm/integrations/aispend.py
+++ b/litellm/integrations/aispend.py
@ -1,53 +1,121 @@
 #### What this does ####
-#    On success + failure, log events to aispend.io 
+#    On success + failure, log events to aispend.io
 import dotenv, os
 import requests
-dotenv.load_dotenv() # Loading env variables using dotenv
+
+dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 import datetime

 model_cost = {
-    "gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
-    "gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
-    "gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
-    "gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
-    "gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
-    "gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
-    "gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
-    "gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
-    "gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
-    "gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
-    "claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
-    "claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
-    "text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
-    "chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
-    "command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
+    "gpt-3.5-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-35-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },  # azure model name
+    "gpt-3.5-turbo-0613": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-0301": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-35-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },  # azure model name
+    "gpt-3.5-turbo-16k-0613": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-4": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-0613": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-32k": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.00006,
+        "output_cost_per_token": 0.00012,
+    },
+    "claude-instant-1": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00000163,
+        "output_cost_per_token": 0.00000551,
+    },
+    "claude-2": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00001102,
+        "output_cost_per_token": 0.00003268,
+    },
+    "text-bison-001": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.000004,
+        "output_cost_per_token": 0.000004,
+    },
+    "chat-bison-001": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000002,
+    },
+    "command-nightly": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000015,
+    },
 }

+
 class AISpendLogger:
    # Class variables or attributes
    def __init__(self):
        # Instance variables
        self.account_id = os.getenv("AISPEND_ACCOUNT_ID")
        self.api_key = os.getenv("AISPEND_API_KEY")
-    
+
    def price_calculator(self, model, response_obj, start_time, end_time):
        # try and find if the model is in the model_cost map
        # else default to the average of the costs
        prompt_tokens_cost_usd_dollar = 0
        completion_tokens_cost_usd_dollar = 0
        if model in model_cost:
-            prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
-            completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
-        elif "replicate" in model: 
+            prompt_tokens_cost_usd_dollar = (
+                model_cost[model]["input_cost_per_token"]
+                * response_obj["usage"]["prompt_tokens"]
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost[model]["output_cost_per_token"]
+                * response_obj["usage"]["completion_tokens"]
+            )
+        elif "replicate" in model:
            # replicate models are charged based on time
            # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
-            model_run_time = end_time - start_time # assuming time in seconds
+            model_run_time = end_time - start_time  # assuming time in seconds
            cost_usd_dollar = model_run_time * 0.0032
            prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
            completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
        else:
-            # calculate average input cost 
+            # calculate average input cost
            input_cost_sum = 0
            output_cost_sum = 0
            for model in model_cost:
@ -55,37 +123,52 @@ class AISpendLogger:
                output_cost_sum += model_cost[model]["output_cost_per_token"]
            avg_input_cost = input_cost_sum / len(model_cost.keys())
            avg_output_cost = output_cost_sum / len(model_cost.keys())
-            prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
-            completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
+            prompt_tokens_cost_usd_dollar = (
+                model_cost[model]["input_cost_per_token"]
+                * response_obj["usage"]["prompt_tokens"]
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost[model]["output_cost_per_token"]
+                * response_obj["usage"]["completion_tokens"]
+            )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
-        
+
    def log_event(self, model, response_obj, start_time, end_time, print_verbose):
        # Method definition
        try:
-            print_verbose(f"AISpend Logging - Enters logging function for model {model}")
+            print_verbose(
+                f"AISpend Logging - Enters logging function for model {model}"
+            )

            url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data"
            headers = {
-                'Authorization': f'Bearer {self.api_key}',
-                'Content-Type': 'application/json'
+                "Authorization": f"Bearer {self.api_key}",
+                "Content-Type": "application/json",
            }

-            response_timestamp = datetime.datetime.fromtimestamp(int(response_obj["created"])).strftime('%Y-%m-%d')
+            response_timestamp = datetime.datetime.fromtimestamp(
+                int(response_obj["created"])
+            ).strftime("%Y-%m-%d")

-            prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
+            (
+                prompt_tokens_cost_usd_dollar,
+                completion_tokens_cost_usd_dollar,
+            ) = self.price_calculator(model, response_obj, start_time, end_time)
            prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100
            completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100
-            data = [{
-                "requests": 1,
-                "requests_context": 1,
-                "context_tokens": response_obj["usage"]["prompt_tokens"],
-                "requests_generated": 1,
-                "generated_tokens": response_obj["usage"]["completion_tokens"],
-                "recorded_date": response_timestamp,
-                "model_id": response_obj["model"],
-                "generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
-                "context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent
-            }]
+            data = [
+                {
+                    "requests": 1,
+                    "requests_context": 1,
+                    "context_tokens": response_obj["usage"]["prompt_tokens"],
+                    "requests_generated": 1,
+                    "generated_tokens": response_obj["usage"]["completion_tokens"],
+                    "recorded_date": response_timestamp,
+                    "model_id": response_obj["model"],
+                    "generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
+                    "context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent,
+                }
+            ]

            print_verbose(f"AISpend Logging - final data object: {data}")
        except:
--- a/litellm/integrations/berrispend.py
+++ b/litellm/integrations/berrispend.py
@ -1,52 +1,120 @@
 #### What this does ####
-#    On success + failure, log events to aispend.io 
+#    On success + failure, log events to aispend.io
 import dotenv, os
 import requests
-dotenv.load_dotenv() # Loading env variables using dotenv
+
+dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 import datetime

 model_cost = {
-    "gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
-    "gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
-    "gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
-    "gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
-    "gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
-    "gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
-    "gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
-    "gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
-    "gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
-    "gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
-    "claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
-    "claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
-    "text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
-    "chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
-    "command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
+    "gpt-3.5-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-35-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },  # azure model name
+    "gpt-3.5-turbo-0613": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-0301": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-35-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },  # azure model name
+    "gpt-3.5-turbo-16k-0613": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-4": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-0613": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-32k": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.00006,
+        "output_cost_per_token": 0.00012,
+    },
+    "claude-instant-1": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00000163,
+        "output_cost_per_token": 0.00000551,
+    },
+    "claude-2": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00001102,
+        "output_cost_per_token": 0.00003268,
+    },
+    "text-bison-001": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.000004,
+        "output_cost_per_token": 0.000004,
+    },
+    "chat-bison-001": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000002,
+    },
+    "command-nightly": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000015,
+    },
 }

+
 class BerriSpendLogger:
    # Class variables or attributes
    def __init__(self):
        # Instance variables
        self.account_id = os.getenv("BERRISPEND_ACCOUNT_ID")
-    
+
    def price_calculator(self, model, response_obj, start_time, end_time):
        # try and find if the model is in the model_cost map
        # else default to the average of the costs
        prompt_tokens_cost_usd_dollar = 0
        completion_tokens_cost_usd_dollar = 0
        if model in model_cost:
-            prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
-            completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
-        elif "replicate" in model: 
+            prompt_tokens_cost_usd_dollar = (
+                model_cost[model]["input_cost_per_token"]
+                * response_obj["usage"]["prompt_tokens"]
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost[model]["output_cost_per_token"]
+                * response_obj["usage"]["completion_tokens"]
+            )
+        elif "replicate" in model:
            # replicate models are charged based on time
            # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
-            model_run_time = end_time - start_time # assuming time in seconds
+            model_run_time = end_time - start_time  # assuming time in seconds
            cost_usd_dollar = model_run_time * 0.0032
            prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
            completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
        else:
-            # calculate average input cost 
+            # calculate average input cost
            input_cost_sum = 0
            output_cost_sum = 0
            for model in model_cost:
@ -54,42 +122,59 @@ class BerriSpendLogger:
                output_cost_sum += model_cost[model]["output_cost_per_token"]
            avg_input_cost = input_cost_sum / len(model_cost.keys())
            avg_output_cost = output_cost_sum / len(model_cost.keys())
-            prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
-            completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
+            prompt_tokens_cost_usd_dollar = (
+                model_cost[model]["input_cost_per_token"]
+                * response_obj["usage"]["prompt_tokens"]
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost[model]["output_cost_per_token"]
+                * response_obj["usage"]["completion_tokens"]
+            )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
-        
-    def log_event(self, model, messages, response_obj, start_time, end_time, print_verbose):
+
+    def log_event(
+        self, model, messages, response_obj, start_time, end_time, print_verbose
+    ):
        # Method definition
        try:
-            print_verbose(f"BerriSpend Logging - Enters logging function for model {model}")
+            print_verbose(
+                f"BerriSpend Logging - Enters logging function for model {model}"
+            )

            url = f"https://berrispend.berri.ai/spend"
-            headers = {
-                'Content-Type': 'application/json'
-            }
+            headers = {"Content-Type": "application/json"}

-            prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
-            total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
+            (
+                prompt_tokens_cost_usd_dollar,
+                completion_tokens_cost_usd_dollar,
+            ) = self.price_calculator(model, response_obj, start_time, end_time)
+            total_cost = (
+                prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
+            )

-            response_time = (end_time-start_time).total_seconds()
+            response_time = (end_time - start_time).total_seconds()
            if "response" in response_obj:
-                data = [{
-                    "response_time": response_time,
-                    "model_id": response_obj["model"],
-                    "total_cost": total_cost, 
-                    "messages": messages,
-                    "response": response_obj['choices'][0]['message']['content'],
-                    "account_id": self.account_id
-                }]
+                data = [
+                    {
+                        "response_time": response_time,
+                        "model_id": response_obj["model"],
+                        "total_cost": total_cost,
+                        "messages": messages,
+                        "response": response_obj["choices"][0]["message"]["content"],
+                        "account_id": self.account_id,
+                    }
+                ]
            elif "error" in response_obj:
-                data = [{
-                    "response_time": response_time,
-                    "model_id": response_obj["model"],
-                    "total_cost": total_cost, 
-                    "messages": messages,
-                    "error": response_obj['error'],
-                    "account_id": self.account_id
-                }]
+                data = [
+                    {
+                        "response_time": response_time,
+                        "model_id": response_obj["model"],
+                        "total_cost": total_cost,
+                        "messages": messages,
+                        "error": response_obj["error"],
+                        "account_id": self.account_id,
+                    }
+                ]

            print_verbose(f"BerriSpend Logging - final data object: {data}")
            response = requests.post(url, headers=headers, json=data)
--- a/litellm/integrations/helicone.py
+++ b/litellm/integrations/helicone.py
@ -2,19 +2,24 @@
 #    On success, logs events to Helicone
 import dotenv, os
 import requests
-dotenv.load_dotenv() # Loading env variables using dotenv
+
+dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
+
+
 class HeliconeLogger:
    # Class variables or attributes
    helicone_model_list = ["gpt", "claude"]
+
    def __init__(self):
        # Instance variables
        self.provider_url = "https://api.openai.com/v1"
-        self.key = os.getenv('HELICONE_API_KEY')
+        self.key = os.getenv("HELICONE_API_KEY")

    def claude_mapping(self, model, messages, response_obj):
        from anthropic import HUMAN_PROMPT, AI_PROMPT
-        prompt = f"{HUMAN_PROMPT}" 
+
+        prompt = f"{HUMAN_PROMPT}"
        for message in messages:
            if "role" in message:
                if message["role"] == "user":
@ -26,48 +31,84 @@ class HeliconeLogger:
        prompt += f"{AI_PROMPT}"
        claude_provider_request = {"model": model, "prompt": prompt}

-        claude_response_obj = {"completion": response_obj['choices'][0]['message']['content'], "model": model, "stop_reason": "stop_sequence"}
+        claude_response_obj = {
+            "completion": response_obj["choices"][0]["message"]["content"],
+            "model": model,
+            "stop_reason": "stop_sequence",
+        }

        return claude_provider_request, claude_response_obj
-        
-    def log_success(self, model, messages, response_obj, start_time, end_time, print_verbose):
+
+    def log_success(
+        self, model, messages, response_obj, start_time, end_time, print_verbose
+    ):
        # Method definition
        try:
-            print_verbose(f"Helicone Logging - Enters logging function for model {model}")
-            model = model if any(accepted_model in model for accepted_model in self.helicone_model_list) else "gpt-3.5-turbo"
+            print_verbose(
+                f"Helicone Logging - Enters logging function for model {model}"
+            )
+            model = (
+                model
+                if any(
+                    accepted_model in model
+                    for accepted_model in self.helicone_model_list
+                )
+                else "gpt-3.5-turbo"
+            )
            provider_request = {"model": model, "messages": messages}

-            if "claude" in model: 
-                provider_request, response_obj = self.claude_mapping(model=model, messages=messages, response_obj=response_obj)
+            if "claude" in model:
+                provider_request, response_obj = self.claude_mapping(
+                    model=model, messages=messages, response_obj=response_obj
+                )

            providerResponse = {
-                "json": response_obj, 
-                "headers": {"openai-version": "2020-10-01"}, 
-                "status": 200
+                "json": response_obj,
+                "headers": {"openai-version": "2020-10-01"},
+                "status": 200,
            }

            # Code to be executed
            url = "https://api.hconeai.com/oai/v1/log"
            headers = {
-                'Authorization': f'Bearer {self.key}',
-                'Content-Type': 'application/json'
+                "Authorization": f"Bearer {self.key}",
+                "Content-Type": "application/json",
            }
            start_time_seconds = int(start_time.timestamp())
-            start_time_milliseconds = int((start_time.timestamp() - start_time_seconds) * 1000)
+            start_time_milliseconds = int(
+                (start_time.timestamp() - start_time_seconds) * 1000
+            )
            end_time_seconds = int(end_time.timestamp())
-            end_time_milliseconds = int((end_time.timestamp() - end_time_seconds) * 1000)
+            end_time_milliseconds = int(
+                (end_time.timestamp() - end_time_seconds) * 1000
+            )
            data = {
-                "providerRequest": {"url": self.provider_url, "json": provider_request, "meta": {"Helicone-Auth": f"Bearer {self.key}"}},
+                "providerRequest": {
+                    "url": self.provider_url,
+                    "json": provider_request,
+                    "meta": {"Helicone-Auth": f"Bearer {self.key}"},
+                },
                "providerResponse": providerResponse,
-                "timing": {"startTime": {"seconds": start_time_seconds, "milliseconds": start_time_milliseconds}, "endTime": {"seconds": end_time_seconds, "milliseconds": end_time_milliseconds}} # {"seconds": .., "milliseconds": ..}
+                "timing": {
+                    "startTime": {
+                        "seconds": start_time_seconds,
+                        "milliseconds": start_time_milliseconds,
+                    },
+                    "endTime": {
+                        "seconds": end_time_seconds,
+                        "milliseconds": end_time_milliseconds,
+                    },
+                },  # {"seconds": .., "milliseconds": ..}
            }
            response = requests.post(url, headers=headers, json=data)
            if response.status_code == 200:
                print_verbose("Helicone Logging - Success!")
            else:
-                print_verbose(f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}")
+                print_verbose(
+                    f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}"
+                )
                print_verbose(f"Helicone Logging - Error {response.text}")
        except:
            # traceback.print_exc()
            print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
-            pass
+            pass
--- a/litellm/integrations/litedebugger.py
+++ b/litellm/integrations/litedebugger.py
@ -0,0 +1,74 @@
+import requests, traceback, json
+class LiteDebugger:
+    def __init__(self):
+        self.api_url = "https://api.litellm.ai/debugger"
+        pass
+    
+    def input_log_event(self, model, messages, end_user, litellm_call_id, print_verbose):
+        try:
+            print_verbose(
+                f"LiteLLMDebugger: Logging - Enters input logging function for model {model}"
+            )
+            litellm_data_obj = {
+                "model": model, 
+                "messages": messages, 
+                "end_user": end_user, 
+                "status": "initiated",
+                "litellm_call_id": litellm_call_id
+            }
+            response = requests.post(url=self.api_url, headers={"content-type": "application/json"}, data=json.dumps(litellm_data_obj))
+            print_verbose(f"LiteDebugger: api response - {response.text}")
+        except:
+            print_verbose(f"LiteDebugger: Logging Error - {traceback.format_exc()}")
+            pass
+    
+    def log_event(self, model,
+        messages,
+        end_user,
+        response_obj,
+        start_time,
+        end_time,
+        litellm_call_id,
+        print_verbose,):
+        try:
+            print_verbose(
+                f"LiteLLMDebugger: Logging - Enters input logging function for model {model}"
+            )
+            total_cost = 0 # [TODO] implement cost tracking
+            response_time = (end_time - start_time).total_seconds()
+            if "choices" in response_obj:
+                litellm_data_obj = {
+                    "response_time": response_time,
+                    "model": response_obj["model"],
+                    "total_cost": total_cost,
+                    "messages": messages,
+                    "response": response_obj["choices"][0]["message"]["content"],
+                    "end_user": end_user,
+                    "litellm_call_id": litellm_call_id,
+                    "status": "success"
+                }
+                print_verbose(
+                    f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
+                )
+                response = requests.post(url=self.api_url, headers={"content-type": "application/json"}, data=json.dumps(litellm_data_obj))
+            elif "error" in response_obj:
+                if "Unable to map your input to a model." in response_obj["error"]:
+                    total_cost = 0
+                litellm_data_obj = {
+                    "response_time": response_time,
+                    "model": response_obj["model"],
+                    "total_cost": total_cost,
+                    "messages": messages,
+                    "error": response_obj["error"],
+                    "end_user": end_user,
+                    "litellm_call_id": litellm_call_id,
+                    "status": "failure"
+                }
+                print_verbose(
+                    f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
+                )
+                response = requests.post(url=self.api_url, headers={"content-type": "application/json"}, data=json.dumps(litellm_data_obj))
+                print_verbose(f"LiteDebugger: api response - {response.text}")
+        except:
+            print_verbose(f"LiteDebugger: Logging Error - {traceback.format_exc()}")
+            pass
--- a/litellm/integrations/supabase.py
+++ b/litellm/integrations/supabase.py
@ -3,31 +3,94 @@

 import dotenv, os
 import requests
-dotenv.load_dotenv() # Loading env variables using dotenv
+
+dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
 import datetime, subprocess, sys

 model_cost = {
-    "gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
-    "gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
-    "gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
-    "gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
-    "gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
-    "gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
-    "gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
-    "gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
-    "gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
-    "gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
-    "claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
-    "claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
-    "text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
-    "chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
-    "command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
+    "gpt-3.5-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-35-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },  # azure model name
+    "gpt-3.5-turbo-0613": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-0301": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-35-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },  # azure model name
+    "gpt-3.5-turbo-16k-0613": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-4": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-0613": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-32k": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.00006,
+        "output_cost_per_token": 0.00012,
+    },
+    "claude-instant-1": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00000163,
+        "output_cost_per_token": 0.00000551,
+    },
+    "claude-2": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00001102,
+        "output_cost_per_token": 0.00003268,
+    },
+    "text-bison-001": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.000004,
+        "output_cost_per_token": 0.000004,
+    },
+    "chat-bison-001": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000002,
+    },
+    "command-nightly": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000015,
+    },
 }

+
 class Supabase:
    # Class variables or attributes
    supabase_table_name = "request_logs"
+
    def __init__(self):
        # Instance variables
        self.supabase_url = os.getenv("SUPABASE_URL")
@ -35,9 +98,11 @@ class Supabase:
        try:
            import supabase
        except ImportError:
-            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'supabase'])
+            subprocess.check_call([sys.executable, "-m", "pip", "install", "supabase"])
            import supabase
-        self.supabase_client = supabase.create_client(self.supabase_url, self.supabase_key)
+        self.supabase_client = supabase.create_client(
+            self.supabase_url, self.supabase_key
+        )

    def price_calculator(self, model, response_obj, start_time, end_time):
        # try and find if the model is in the model_cost map
@ -45,17 +110,23 @@ class Supabase:
        prompt_tokens_cost_usd_dollar = 0
        completion_tokens_cost_usd_dollar = 0
        if model in model_cost:
-            prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
-            completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
-        elif "replicate" in model: 
+            prompt_tokens_cost_usd_dollar = (
+                model_cost[model]["input_cost_per_token"]
+                * response_obj["usage"]["prompt_tokens"]
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost[model]["output_cost_per_token"]
+                * response_obj["usage"]["completion_tokens"]
+            )
+        elif "replicate" in model:
            # replicate models are charged based on time
            # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
-            model_run_time = end_time - start_time # assuming time in seconds
+            model_run_time = end_time - start_time  # assuming time in seconds
            cost_usd_dollar = model_run_time * 0.0032
            prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
            completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
        else:
-            # calculate average input cost 
+            # calculate average input cost
            input_cost_sum = 0
            output_cost_sum = 0
            for model in model_cost:
@ -63,41 +134,104 @@ class Supabase:
                output_cost_sum += model_cost[model]["output_cost_per_token"]
            avg_input_cost = input_cost_sum / len(model_cost.keys())
            avg_output_cost = output_cost_sum / len(model_cost.keys())
-            prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
-            completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
+            prompt_tokens_cost_usd_dollar = (
+                model_cost[model]["input_cost_per_token"]
+                * response_obj["usage"]["prompt_tokens"]
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost[model]["output_cost_per_token"]
+                * response_obj["usage"]["completion_tokens"]
+            )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
-        
-    def log_event(self, model, messages, end_user, response_obj, start_time, end_time, print_verbose):
+
+    def input_log_event(self, model, messages, end_user, litellm_call_id, print_verbose):
        try:
-            print_verbose(f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}")
+            print_verbose(
+                f"Supabase Logging - Enters input logging function for model {model}"
+            )
+            supabase_data_obj = {
+                "model": model, 
+                "messages": messages, 
+                "end_user": end_user, 
+                "status": "initiated",
+                "litellm_call_id": litellm_call_id
+            }
+            data, count = (
+                    self.supabase_client.table(self.supabase_table_name)
+                    .insert(supabase_data_obj)
+                    .execute()
+                )
+            print(f"data: {data}")
+        except:
+            print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
+            pass

-            prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
-            total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
+    def log_event(
+        self,
+        model,
+        messages,
+        end_user,
+        response_obj,
+        start_time,
+        end_time,
+        litellm_call_id,
+        print_verbose,
+    ):
+        try:
+            print_verbose(
+                f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}"
+            )

-            response_time = (end_time-start_time).total_seconds()
+            (
+                prompt_tokens_cost_usd_dollar,
+                completion_tokens_cost_usd_dollar,
+            ) = self.price_calculator(model, response_obj, start_time, end_time)
+            total_cost = (
+                prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
+            )
+
+            response_time = (end_time - start_time).total_seconds()
            if "choices" in response_obj:
                supabase_data_obj = {
                    "response_time": response_time,
                    "model": response_obj["model"],
-                    "total_cost": total_cost, 
+                    "total_cost": total_cost,
                    "messages": messages,
-                    "response": response_obj['choices'][0]['message']['content'],
-                    "end_user": end_user
+                    "response": response_obj["choices"][0]["message"]["content"],
+                    "end_user": end_user,
+                    "litellm_call_id": litellm_call_id,
+                    "status": "success"
                }
-                print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}")
-                data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute()
+                print_verbose(
+                    f"Supabase Logging - final data object: {supabase_data_obj}"
+                )
+                data, count = (
+                    self.supabase_client.table(self.supabase_table_name)
+                    .upsert(supabase_data_obj)
+                    .execute()
+                )
            elif "error" in response_obj:
+                if "Unable to map your input to a model." in response_obj["error"]:
+                    total_cost = 0
                supabase_data_obj = {
                    "response_time": response_time,
                    "model": response_obj["model"],
-                    "total_cost": total_cost, 
+                    "total_cost": total_cost,
                    "messages": messages,
-                    "error": response_obj['error'],
-                    "end_user": end_user
+                    "error": response_obj["error"],
+                    "end_user": end_user,
+                    "litellm_call_id": litellm_call_id,
+                    "status": "failure"
                }
-                print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}")
-                data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute()
-            
+                print_verbose(
+                    f"Supabase Logging - final data object: {supabase_data_obj}"
+                )
+                data, count = (
+                    self.supabase_client.table(self.supabase_table_name)
+                    .upsert(supabase_data_obj)
+                    .execute()
+                )
+
        except:
            # traceback.print_exc()
            print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
--- a/litellm/llms/init.py
+++ b/litellm/llms/init.py
@ -1 +1 @@
-from . import *
+from . import *
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -1,59 +1,78 @@
 import os, json
 from enum import Enum
 import requests
-from litellm import logging
-import time 
+import time
 from typing import Callable
+from litellm.utils import ModelResponse
+

 class AnthropicConstants(Enum):
    HUMAN_PROMPT = "\n\nHuman:"
    AI_PROMPT = "\n\nAssistant:"

+
 class AnthropicError(Exception):
    def __init__(self, status_code, message):
        self.status_code = status_code
        self.message = message
-        super().__init__(self.message) # Call the base class constructor with the parameters it needs
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs

-class AnthropicLLM: 
-    
-    def __init__(self, encoding, default_max_tokens_to_sample, api_key=None):
+
+class AnthropicLLM:
+    def __init__(self, encoding, default_max_tokens_to_sample, logging_obj, api_key=None):
        self.encoding = encoding
        self.default_max_tokens_to_sample = default_max_tokens_to_sample
        self.completion_url = "https://api.anthropic.com/v1/complete"
+        self.api_key = api_key
+        self.logging_obj = logging_obj
        self.validate_environment(api_key=api_key)
-    
-    def validate_environment(self, api_key): # set up the environment required to run the model 
-        # set the api key 
-        try:
-            self.api_key = os.getenv("ANTHROPIC_API_KEY") if "ANTHROPIC_API_KEY" in os.environ else api_key
-            if self.api_key == None:
-                raise Exception
-            
-            self.headers = {
-                "accept": "application/json",
-                "anthropic-version": "2023-06-01",
-                "content-type": "application/json",
-                "x-api-key": self.api_key 
-            }

-        except:
-            raise ValueError("Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params")
-        pass  
+    def validate_environment(
+        self, api_key
+    ):  # set up the environment required to run the model
+        # set the api key
+        if self.api_key == None:
+            raise ValueError(
+                "Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
+            )
+        self.api_key = api_key
+        self.headers = {
+            "accept": "application/json",
+            "anthropic-version": "2023-06-01",
+            "content-type": "application/json",
+            "x-api-key": self.api_key,
+        }

-    def completion(self, model: str, messages: list, model_response: dict, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls
+    def completion(
+        self,
+        model: str,
+        messages: list,
+        model_response: ModelResponse,
+        print_verbose: Callable,
+        optional_params=None,
+        litellm_params=None,
+        logger_fn=None,
+    ):  # logic for parsing in - calling - parsing out model completion calls
        model = model
        prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
        for message in messages:
            if "role" in message:
                if message["role"] == "user":
-                    prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
+                    prompt += (
+                        f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
+                    )
                else:
-                    prompt += f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
+                    prompt += (
+                        f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
+                    )
            else:
                prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
        prompt += f"{AnthropicConstants.AI_PROMPT.value}"
-        if "max_tokens" in optional_params and optional_params["max_tokens"] != float('inf'): 
+        if "max_tokens" in optional_params and optional_params["max_tokens"] != float(
+            "inf"
+        ):
            max_tokens = optional_params["max_tokens"]
        else:
            max_tokens = self.default_max_tokens_to_sample
@ -61,39 +80,51 @@ class AnthropicLLM:
            "model": model,
            "prompt": prompt,
            "max_tokens_to_sample": max_tokens,
-            **optional_params
+            **optional_params,
        }

        ## LOGGING
-        logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params}, logger_fn=logger_fn)
+        self.logging_obj.pre_call(input=prompt, api_key=self.api_key, additional_args={"complete_input_dict": data})
        ## COMPLETION CALL
-        response = requests.post(self.completion_url, headers=self.headers, data=json.dumps(data))
+        response = requests.post(
+            self.completion_url, headers=self.headers, data=json.dumps(data)
+        )
        if "stream" in optional_params and optional_params["stream"] == True:
            return response.iter_lines()
        else:
            ## LOGGING
-            logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params, "original_response": response.text}, logger_fn=logger_fn)
+            self.logging_obj.post_call(input=prompt, api_key=self.api_key, original_response=response.text, additional_args={"complete_input_dict": data})
            print_verbose(f"raw model_response: {response.text}")
            ## RESPONSE OBJECT
            completion_response = response.json()
            if "error" in completion_response:
-                raise AnthropicError(message=completion_response["error"], status_code=response.status_code)
+                raise AnthropicError(
+                    message=completion_response["error"],
+                    status_code=response.status_code,
+                )
            else:
-                model_response["choices"][0]["message"]["content"] = completion_response["completion"]    
-            
+                model_response["choices"][0]["message"][
+                    "content"
+                ] = completion_response["completion"]
+
            ## CALCULATING USAGE
-            prompt_tokens = len(self.encoding.encode(prompt)) ##[TODO] use the anthropic tokenizer here
-            completion_tokens = len(self.encoding.encode(model_response["choices"][0]["message"]["content"])) ##[TODO] use the anthropic tokenizer here
-            
-            
+            prompt_tokens = len(
+                self.encoding.encode(prompt)
+            )  ##[TODO] use the anthropic tokenizer here
+            completion_tokens = len(
+                self.encoding.encode(model_response["choices"][0]["message"]["content"])
+            )  ##[TODO] use the anthropic tokenizer here
+
            model_response["created"] = time.time()
            model_response["model"] = model
            model_response["usage"] = {
                "prompt_tokens": prompt_tokens,
                "completion_tokens": completion_tokens,
-                "total_tokens": prompt_tokens + completion_tokens
-                }
+                "total_tokens": prompt_tokens + completion_tokens,
+            }
            return model_response
-    
-    def embedding(): # logic for parsing in - calling - parsing out model embedding calls
-        pass 
+
+    def embedding(
+        self,
+    ):  # logic for parsing in - calling - parsing out model embedding calls
+        pass
--- a/litellm/llms/base.py
+++ b/litellm/llms/base.py
@ -1,11 +1,16 @@
 ## This is a template base class to be used for adding new LLM providers via API calls

-class BaseLLM():
-    def validate_environment(): # set up the environment required to run the model 
-        pass  

-    def completion(): # logic for parsing in - calling - parsing out model completion calls
+class BaseLLM:
+    def validate_environment(self):  # set up the environment required to run the model
        pass

-    def embedding(): # logic for parsing in - calling - parsing out model embedding calls
-        pass 
+    def completion(
+        self,
+    ):  # logic for parsing in - calling - parsing out model completion calls
+        pass
+
+    def embedding(
+        self,
+    ):  # logic for parsing in - calling - parsing out model embedding calls
+        pass
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@ -2,39 +2,60 @@
 import os, json
 from enum import Enum
 import requests
-from litellm import logging
-import time 
+import time
 from typing import Callable
+from litellm.utils import ModelResponse
+from typing import Optional
+

 class HuggingfaceError(Exception):
    def __init__(self, status_code, message):
        self.status_code = status_code
        self.message = message
-        super().__init__(self.message) # Call the base class constructor with the parameters it needs
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs

-class HuggingfaceRestAPILLM():
-    def __init__(self, encoding, api_key=None) -> None:
+
+class HuggingfaceRestAPILLM:
+    def __init__(self, encoding, logging_obj, api_key=None) -> None:
        self.encoding = encoding
+        self.logging_obj = logging_obj
        self.validate_environment(api_key=api_key)

-    def validate_environment(self, api_key): # set up the environment required to run the model 
+    def validate_environment(
+        self, api_key
+    ):  # set up the environment required to run the model
        self.headers = {
            "content-type": "application/json",
        }
        # get the api key if it exists in the environment or is passed in, but don't require it
-        self.api_key = os.getenv("HF_TOKEN") if "HF_TOKEN" in os.environ else api_key
+        self.api_key = api_key
        if self.api_key != None:
-            self.headers["Authorization"] = f"Bearer {self.api_key}" 
+            self.headers["Authorization"] = f"Bearer {self.api_key}"

-    def completion(self, model: str, messages: list, custom_api_base: str, model_response: dict, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls
+    def completion(
+        self,
+        model: str,
+        messages: list,
+        custom_api_base: str,
+        model_response: ModelResponse,
+        print_verbose: Callable,
+        optional_params=None,
+        litellm_params=None,
+        logger_fn=None,
+    ):  # logic for parsing in - calling - parsing out model completion calls
+        completion_url: str = ""
        if custom_api_base:
            completion_url = custom_api_base
        elif "HF_API_BASE" in os.environ:
-            completion_url = os.getenv("HF_API_BASE")
+            completion_url = os.getenv("HF_API_BASE", "")
        else:
            completion_url = f"https://api-inference.huggingface.co/models/{model}"
        prompt = ""
-        if "meta-llama" in model and "chat" in model: # use the required special tokens for meta-llama - https://huggingface.co/blog/llama2#how-to-prompt-llama-2
+        if (
+            "meta-llama" in model and "chat" in model
+        ):  # use the required special tokens for meta-llama - https://huggingface.co/blog/llama2#how-to-prompt-llama-2
            prompt = "<s>"
            for message in messages:
                if message["role"] == "system":
@ -46,49 +67,60 @@ class HuggingfaceRestAPILLM():
        else:
            for message in messages:
                prompt += f"{message['content']}"
-        ### MAP INPUT PARAMS 
-        # max tokens  
+        ### MAP INPUT PARAMS
+        # max tokens
        if "max_tokens" in optional_params:
            value = optional_params.pop("max_tokens")
            optional_params["max_new_tokens"] = value
        data = {
            "inputs": prompt,
-            # "parameters": optional_params
+            "parameters": optional_params
        }
        ## LOGGING
-        logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params}, logger_fn=logger_fn)
+        self.logging_obj.pre_call(input=prompt, api_key=self.api_key, additional_args={"complete_input_dict": data})
        ## COMPLETION CALL
-        response = requests.post(completion_url, headers=self.headers, data=json.dumps(data))
+        response = requests.post(
+            completion_url, headers=self.headers, data=json.dumps(data)
+        )
        if "stream" in optional_params and optional_params["stream"] == True:
            return response.iter_lines()
        else:
            ## LOGGING
-            logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params, "original_response": response.text}, logger_fn=logger_fn)
-            print_verbose(f"raw model_response: {response.text}")
+            self.logging_obj.post_call(input=prompt, api_key=self.api_key, original_response=response.text, additional_args={"complete_input_dict": data})
            ## RESPONSE OBJECT
            completion_response = response.json()
            print_verbose(f"response: {completion_response}")
            if isinstance(completion_response, dict) and "error" in completion_response:
                print_verbose(f"completion error: {completion_response['error']}")
                print_verbose(f"response.status_code: {response.status_code}")
-                raise HuggingfaceError(message=completion_response["error"], status_code=response.status_code)
+                raise HuggingfaceError(
+                    message=completion_response["error"],
+                    status_code=response.status_code,
+                )
            else:
-                model_response["choices"][0]["message"]["content"] = completion_response[0]["generated_text"]    
-            
+                model_response["choices"][0]["message"][
+                    "content"
+                ] = completion_response[0]["generated_text"]
+
            ## CALCULATING USAGE
-            prompt_tokens = len(self.encoding.encode(prompt)) ##[TODO] use the llama2 tokenizer here
-            completion_tokens = len(self.encoding.encode(model_response["choices"][0]["message"]["content"])) ##[TODO] use the llama2 tokenizer here
-            
-            
+            prompt_tokens = len(
+                self.encoding.encode(prompt)
+            )  ##[TODO] use the llama2 tokenizer here
+            completion_tokens = len(
+                self.encoding.encode(model_response["choices"][0]["message"]["content"])
+            )  ##[TODO] use the llama2 tokenizer here
+
            model_response["created"] = time.time()
            model_response["model"] = model
            model_response["usage"] = {
                "prompt_tokens": prompt_tokens,
                "completion_tokens": completion_tokens,
-                "total_tokens": prompt_tokens + completion_tokens
-                }
+                "total_tokens": prompt_tokens + completion_tokens,
+            }
            return model_response
        pass

-    def embedding(): # logic for parsing in - calling - parsing out model embedding calls
-        pass 
+    def embedding(
+        self,
+    ):  # logic for parsing in - calling - parsing out model embedding calls
+        pass
--- a/litellm/main.py
+++ b/litellm/main.py
--- a/litellm/testing.py
+++ b/litellm/testing.py
@ -0,0 +1,137 @@
+import litellm
+import time
+from concurrent.futures import ThreadPoolExecutor
+import traceback
+
+
+def testing_batch_completion(*args, **kwargs):
+    try:
+        batch_models = (
+            args[0] if len(args) > 0 else kwargs.pop("models")
+        )  ## expected input format- ["gpt-3.5-turbo", {"model": "qvv0xeq", "custom_llm_provider"="baseten"}...]
+        batch_messages = args[1] if len(args) > 1 else kwargs.pop("messages")
+        results = []
+        completions = []
+        exceptions = []
+        times = []
+        with ThreadPoolExecutor() as executor:
+            for model in batch_models:
+                kwargs_modified = dict(kwargs)
+                args_modified = list(args)
+                if len(args) > 0:
+                    args_modified[0] = model["model"]
+                else:
+                    kwargs_modified["model"] = (
+                        model["model"]
+                        if isinstance(model, dict) and "model" in model
+                        else model
+                    )  # if model is a dictionary get it's value else assume it's a string
+                    kwargs_modified["custom_llm_provider"] = (
+                        model["custom_llm_provider"]
+                        if isinstance(model, dict) and "custom_llm_provider" in model
+                        else None
+                    )
+                    kwargs_modified["custom_api_base"] = (
+                        model["custom_api_base"]
+                        if isinstance(model, dict) and "custom_api_base" in model
+                        else None
+                    )
+                for message_list in batch_messages:
+                    if len(args) > 1:
+                        args_modified[1] = message_list
+                        future = executor.submit(
+                            litellm.completion, *args_modified, **kwargs_modified
+                        )
+                    else:
+                        kwargs_modified["messages"] = message_list
+                        future = executor.submit(
+                            litellm.completion, *args_modified, **kwargs_modified
+                        )
+                    completions.append((future, message_list))
+
+        # Retrieve the results and calculate elapsed time for each completion call
+        for completion in completions:
+            future, message_list = completion
+            start_time = time.time()
+            try:
+                result = future.result()
+                end_time = time.time()
+                elapsed_time = end_time - start_time
+                result_dict = {
+                    "status": "succeeded",
+                    "response": future.result(),
+                    "prompt": message_list,
+                    "response_time": elapsed_time,
+                }
+                results.append(result_dict)
+            except Exception as e:
+                end_time = time.time()
+                elapsed_time = end_time - start_time
+                result_dict = {
+                    "status": "failed",
+                    "response": e,
+                    "response_time": elapsed_time,
+                }
+                results.append(result_dict)
+        return results
+    except:
+        traceback.print_exc()
+
+
+def duration_test_model(original_function):
+    def wrapper_function(*args, **kwargs):
+        # Code to be executed before the original function
+        duration = kwargs.pop("duration", None)
+        interval = kwargs.pop("interval", None)
+        results = []
+        if duration and interval:
+            start_time = time.time()
+            end_time = start_time + duration  # default to 1hr duration
+            while time.time() < end_time:
+                result = original_function(*args, **kwargs)
+                results.append(result)
+                time.sleep(interval)
+        else:
+            result = original_function(*args, **kwargs)
+            results = result
+        return results
+
+    # Return the wrapper function
+    return wrapper_function
+
+
+@duration_test_model
+def load_test_model(models: list, prompt: str = "", num_calls: int = 0):
+    test_calls = 100
+    if num_calls:
+        test_calls = num_calls
+    input_prompt = prompt if prompt else "Hey, how's it going?"
+    messages = (
+        [{"role": "user", "content": prompt}]
+        if prompt
+        else [{"role": "user", "content": input_prompt}]
+    )
+    full_message_list = [
+        messages for _ in range(test_calls)
+    ]  # call it as many times as set by user to load test models
+    start_time = time.time()
+    try:
+        results = testing_batch_completion(models=models, messages=full_message_list)
+        end_time = time.time()
+        response_time = end_time - start_time
+        return {
+            "total_response_time": response_time,
+            "calls_made": test_calls,
+            "prompt": input_prompt,
+            "results": results,
+        }
+    except Exception as e:
+        traceback.print_exc()
+        end_time = time.time()
+        response_time = end_time - start_time
+        return {
+            "total_response_time": response_time,
+            "calls_made": test_calls,
+            "prompt": input_prompt,
+            "exception": e,
+        }
--- a/litellm/tests/test_api_key_param.py
+++ b/litellm/tests/test_api_key_param.py
@ -3,39 +3,51 @@

 import sys, os
 import traceback
-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import litellm
 from litellm import embedding, completion

 litellm.set_verbose = False

+
 def logger_fn(model_call_object: dict):
    print(f"model call details: {model_call_object}")

+
 user_message = "Hello, how are you?"
-messages = [{ "content": user_message,"role": "user"}]
+messages = [{"content": user_message, "role": "user"}]

 ## Test 1: Setting key dynamically
-temp_key = os.environ.get("ANTHROPIC_API_KEY")
+temp_key = os.environ.get("ANTHROPIC_API_KEY", "")
 os.environ["ANTHROPIC_API_KEY"] = "bad-key"
-# test on openai completion call 
+# test on openai completion call
 try:
-    response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn, api_key=temp_key)
+    response = completion(
+        model="claude-instant-1",
+        messages=messages,
+        logger_fn=logger_fn,
+        api_key=temp_key,
+    )
    print(f"response: {response}")
 except:
-    print(f"error occurred: {traceback.format_exc()}") 
+    print(f"error occurred: {traceback.format_exc()}")
    pass
 os.environ["ANTHROPIC_API_KEY"] = temp_key


 ## Test 2: Setting key via __init__ params
-litellm.anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
+litellm.anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
 os.environ.pop("ANTHROPIC_API_KEY")
-# test on openai completion call 
+# test on openai completion call
 try:
-    response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
+    response = completion(
+        model="claude-instant-1", messages=messages, logger_fn=logger_fn
+    )
    print(f"response: {response}")
 except:
-    print(f"error occurred: {traceback.format_exc()}") 
+    print(f"error occurred: {traceback.format_exc()}")
    pass
 os.environ["ANTHROPIC_API_KEY"] = temp_key
--- a/litellm/tests/test_async_fn.py
+++ b/litellm/tests/test_async_fn.py
@ -5,17 +5,22 @@ import sys, os
 import pytest
 import traceback
 import asyncio
-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 from litellm import acompletion

+
 async def test_get_response():
    user_message = "Hello, how are you?"
-    messages = [{ "content": user_message,"role": "user"}]
+    messages = [{"content": user_message, "role": "user"}]
    try:
        response = await acompletion(model="gpt-3.5-turbo", messages=messages)
    except Exception as e:
        pytest.fail(f"error occurred: {e}")
    return response

+
 response = asyncio.run(test_get_response())
-print(response)
+print(response)
--- a/litellm/tests/test_bad_params.py
+++ b/litellm/tests/test_bad_params.py
@ -1,16 +1,17 @@
 #### What this tests ####
 #    This tests chaos monkeys - if random parts of the system are broken / things aren't sent correctly - what happens.
-#    Expect to add more edge cases to this over time. 
+#    Expect to add more edge cases to this over time.

 import sys, os
 import traceback
 from dotenv import load_dotenv
+
 load_dotenv()
 # Get the current directory of the script
 current_dir = os.path.dirname(os.path.abspath(__file__))

 # Get the parent directory by joining the current directory with '..'
-parent_dir = os.path.join(current_dir, '../..')
+parent_dir = os.path.join(current_dir, "../..")

 # Add the parent directory to the system path
 sys.path.append(parent_dir)
@ -26,7 +27,7 @@ litellm.failure_callback = ["slack", "sentry", "posthog"]


 user_message = "Hello, how are you?"
-messages = [{ "content": user_message,"role": "user"}]
+messages = [{"content": user_message, "role": "user"}]
 model_val = None


@ -35,18 +36,18 @@ def test_completion_with_empty_model():
    try:
        response = completion(model=model_val, messages=messages)
    except Exception as e:
-        print(f"error occurred: {e}") 
+        print(f"error occurred: {e}")
        pass


-#bad key
+# bad key
 temp_key = os.environ.get("OPENAI_API_KEY")
 os.environ["OPENAI_API_KEY"] = "bad-key"
-# test on openai completion call 
+# test on openai completion call
 try:
    response = completion(model="gpt-3.5-turbo", messages=messages)
    print(f"response: {response}")
 except:
-    print(f"error occurred: {traceback.format_exc()}") 
+    print(f"error occurred: {traceback.format_exc()}")
    pass
-os.environ["OPENAI_API_KEY"] = temp_key
+os.environ["OPENAI_API_KEY"] = str(temp_key)  # this passes linting#5
--- a/litellm/tests/test_batch_completions.py
+++ b/litellm/tests/test_batch_completions.py
@ -3,7 +3,10 @@

 import sys, os
 import traceback
-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import litellm
 from litellm import batch_completion

@ -14,4 +17,4 @@ model = "gpt-3.5-turbo"

 result = batch_completion(model=model, messages=messages)
 print(result)
-print(len(result))
+print(len(result))
--- a/litellm/tests/test_berrispend_integration.py
+++ b/litellm/tests/test_berrispend_integration.py
@ -19,7 +19,7 @@


 # #openai call
-# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) 
+# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])

 # #bad request call
-# response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}]) 
+# response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -0,0 +1,52 @@
+import sys, os
+import traceback
+from dotenv import load_dotenv
+
+load_dotenv()
+import os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import pytest
+import litellm
+from litellm import embedding, completion
+
+messages = [{"role": "user", "content": "who is ishaan Github?  "}]
+
+
+# test if response cached
+def test_caching():
+    try:
+        litellm.caching = True
+        response1 = completion(model="gpt-3.5-turbo", messages=messages)
+        response2 = completion(model="gpt-3.5-turbo", messages=messages)
+        print(f"response1: {response1}")
+        print(f"response2: {response2}")
+        litellm.caching = False
+        if response2 != response1:
+            print(f"response1: {response1}")
+            print(f"response2: {response2}")
+            pytest.fail(f"Error occurred: {e}")
+    except Exception as e:
+        litellm.caching = False
+        print(f"error occurred: {traceback.format_exc()}")
+        pytest.fail(f"Error occurred: {e}")
+
+
+
+def test_caching_with_models():
+    litellm.caching_with_models = True
+    response2 = completion(model="gpt-3.5-turbo", messages=messages)
+    response3 = completion(model="command-nightly", messages=messages)
+    print(f"response2: {response2}")
+    print(f"response3: {response3}")
+    litellm.caching_with_models = False
+    if response3 == response2:
+        # if models are different, it should not return cached response
+        print(f"response2: {response2}")
+        print(f"response3: {response3}")
+        pytest.fail(f"Error occurred: {e}")
+
+
+
--- a/litellm/tests/test_client.py
+++ b/litellm/tests/test_client.py
@ -5,7 +5,9 @@ import sys, os
 import traceback
 import pytest

-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import litellm
 from litellm import embedding, completion

@ -14,53 +16,71 @@ litellm.failure_callback = ["slack", "sentry", "posthog"]

 litellm.set_verbose = True

+
 def logger_fn(model_call_object: dict):
    # print(f"model call details: {model_call_object}")
    pass

+
 user_message = "Hello, how are you?"
-messages = [{ "content": user_message,"role": "user"}]
+messages = [{"content": user_message, "role": "user"}]
+

 def test_completion_openai():
    try:
        print("running query")
-        response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn)
+        response = completion(
+            model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn
+        )
        print(f"response: {response}")
        # Add any assertions here to check the response
    except Exception as e:
        traceback.print_exc()
        pytest.fail(f"Error occurred: {e}")
-test_completion_openai()
+

 def test_completion_claude():
    try:
-        response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
+        response = completion(
+            model="claude-instant-1", messages=messages, logger_fn=logger_fn
+        )
        # Add any assertions here to check the response
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
-test_completion_claude()
+
+
 def test_completion_non_openai():
    try:
-        response = completion(model="command-nightly", messages=messages, logger_fn=logger_fn)
+        response = completion(
+            model="command-nightly", messages=messages, logger_fn=logger_fn
+        )
        # Add any assertions here to check the response
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
-test_completion_non_openai()
+
+
 def test_embedding_openai():
    try:
-        response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn)
+        response = embedding(
+            model="text-embedding-ada-002", input=[user_message], logger_fn=logger_fn
+        )
        # Add any assertions here to check the response
        print(f"response: {str(response)[:50]}")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

+
 def test_bad_azure_embedding():
    try:
-        response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn)
+        response = embedding(
+            model="chatgpt-test", input=[user_message], logger_fn=logger_fn
+        )
        # Add any assertions here to check the response
        print(f"response: {str(response)[:50]}")
    except Exception as e:
        pass
+
+
 # def test_good_azure_embedding():
 #     try:
 #         response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
@ -68,4 +88,3 @@ def test_bad_azure_embedding():
 #         print(f"response: {str(response)[:50]}")
 #     except Exception as e:
 #         pytest.fail(f"Error occurred: {e}")
-
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -1,53 +1,79 @@
 import sys, os
 import traceback
 from dotenv import load_dotenv
+
 load_dotenv()
 import os
-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import pytest
 import litellm
 from litellm import embedding, completion
+
 # from infisical import InfisicalClient

 # litellm.set_verbose = True
 # litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])

 user_message = "Hello, whats the weather in San Francisco??"
-messages = [{ "content": user_message,"role": "user"}]
+messages = [{"content": user_message, "role": "user"}]
+

 def logger_fn(user_model_dict):
    print(f"user_model_dict: {user_model_dict}")

-def test_completion_claude():
+
+def test_completion_custom_provider_model_name():
    try:
-        response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
+        response = completion(
+            model="together_ai/togethercomputer/llama-2-70b-chat", messages=messages, logger_fn=logger_fn
+        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

+test_completion_custom_provider_model_name()
+
+def test_completion_claude():
+    try:
+        response = completion(
+            model="claude-instant-1", messages=messages, logger_fn=logger_fn
+        )
+        # Add any assertions here to check the response
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
 def test_completion_claude_stream():
    try:
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": "how does a court case get to the Supreme Court?"}
+            {
+                "role": "user",
+                "content": "how does a court case get to the Supreme Court?",
+            },
        ]
        response = completion(model="claude-2", messages=messages, stream=True)
        # Add any assertions here to check the response
        for chunk in response:
-            print(chunk['choices'][0]['delta']) # same as openai format
+            print(chunk["choices"][0]["delta"])  # same as openai format
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

-def test_completion_hf_api():
-    try:
-        user_message = "write some code to find the sum of two numbers"
-        messages = [{ "content": user_message,"role": "user"}]
-        response = completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface")
-        # Add any assertions here to check the response
-        print(response)
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
+
+# def test_completion_hf_api():
+#     try:
+#         user_message = "write some code to find the sum of two numbers"
+#         messages = [{ "content": user_message,"role": "user"}]
+#         response = completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface")
+#         # Add any assertions here to check the response
+#         print(response)
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")

 # def test_completion_hf_deployed_api():
 #     try:
@ -62,65 +88,140 @@ def test_completion_hf_api():

 def test_completion_cohere():
    try:
-        response = completion(model="command-nightly", messages=messages, max_tokens=500)
+        response = completion(
+            model="command-nightly", messages=messages, max_tokens=100, logit_bias={40: 10}
+        )
        # Add any assertions here to check the response
        print(response)
+        response_str = response["choices"][0]["message"]["content"]
+        print(f"str response{response_str}")
+        response_str_2 = response.choices[0].message.content
+        if type(response_str) != str:
+            pytest.fail(f"Error occurred: {e}")
+        if type(response_str_2) != str:
+            pytest.fail(f"Error occurred: {e}")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

-
 def test_completion_cohere_stream():
    try:
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": "how does a court case get to the Supreme Court?"}
+            {
+                "role": "user",
+                "content": "how does a court case get to the Supreme Court?",
+            },
        ]
-        response = completion(model="command-nightly", messages=messages, stream=True, max_tokens=50)
+        response = completion(
+            model="command-nightly", messages=messages, stream=True, max_tokens=50
+        )
        # Add any assertions here to check the response
        for chunk in response:
-            print(chunk['choices'][0]['delta']) # same as openai format
+            print(chunk["choices"][0]["delta"])  # same as openai format
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

+
 def test_completion_openai():
    try:
        response = completion(model="gpt-3.5-turbo", messages=messages)
+
+        response_str = response["choices"][0]["message"]["content"]
+        response_str_2 = response.choices[0].message.content
+        assert response_str == response_str_2
+        assert type(response_str) == str
+        assert len(response_str) > 1
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+def test_completion_text_openai():
+    try:
+        response = completion(model="text-davinci-003", messages=messages)
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

+
 def test_completion_openai_with_optional_params():
    try:
-        response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, user="ishaan_dev@berri.ai")
+        response = completion(
+            model="gpt-3.5-turbo",
+            messages=messages,
+            temperature=0.5,
+            top_p=0.1,
+            user="ishaan_dev@berri.ai",
+        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

+
 def test_completion_openrouter():
    try:
-        response = completion(model="google/palm-2-chat-bison", messages=messages, temperature=0.5, top_p=0.1, user="ishaan_dev@berri.ai")
+        response = completion(
+            model="google/palm-2-chat-bison",
+            messages=messages,
+            temperature=0.5,
+            top_p=0.1,
+            user="ishaan_dev@berri.ai",
+        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

+
 def test_completion_openai_with_more_optional_params():
    try:
-        response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, n=2, max_tokens=150, presence_penalty=0.5, frequency_penalty=-0.5, logit_bias={123: 5}, user="ishaan_dev@berri.ai")
+        response = completion(
+            model="gpt-3.5-turbo",
+            messages=messages,
+            temperature=0.5,
+            top_p=0.1,
+            n=2,
+            max_tokens=150,
+            presence_penalty=0.5,
+            frequency_penalty=-0.5,
+            logit_bias={123: 5},
+            user="ishaan_dev@berri.ai",
+        )
+        # Add any assertions here to check the response
+        print(response)
+        response_str = response["choices"][0]["message"]["content"]
+        response_str_2 = response.choices[0].message.content
+        print(response["choices"][0]["message"]["content"])
+        print(response.choices[0].message.content)
+        if type(response_str) != str:
+            pytest.fail(f"Error occurred: {e}")
+        if type(response_str_2) != str:
+            pytest.fail(f"Error occurred: {e}")
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+def test_completion_openai_with_stream():
+    try:
+        response = completion(
+            model="gpt-3.5-turbo",
+            messages=messages,
+            temperature=0.5,
+            top_p=0.1,
+            n=2,
+            max_tokens=150,
+            presence_penalty=0.5,
+            stream=True,
+            frequency_penalty=-0.5,
+            logit_bias={27000: 5},
+            user="ishaan_dev@berri.ai",
+        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

-def test_completion_openai_with_stream():
-    try:
-        response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, n=2, max_tokens=150, presence_penalty=0.5, stream=True, frequency_penalty=-0.5, logit_bias={27000: 5}, user="ishaan_dev@berri.ai")
-        # Add any assertions here to check the response
-        print(response)
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")

 def test_completion_openai_with_functions():
    function1 = [
@ -132,33 +233,39 @@ def test_completion_openai_with_functions():
                "properties": {
                    "location": {
                        "type": "string",
-                        "description": "The city and state, e.g. San Francisco, CA"
+                        "description": "The city and state, e.g. San Francisco, CA",
                    },
-                    "unit": {
-                        "type": "string",
-                        "enum": ["celsius", "fahrenheit"]
-                    }
+                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
-                "required": ["location"]
-            }
+                "required": ["location"],
+            },
        }
    ]
    try:
-        response = completion(model="gpt-3.5-turbo", messages=messages, functions=function1)
+        response = completion(
+            model="gpt-3.5-turbo", messages=messages, functions=function1
+        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

+
 def test_completion_azure():
    try:
-        response = completion(model="gpt-3.5-turbo", deployment_id="chatgpt-test", messages=messages, custom_llm_provider="azure")
+        response = completion(
+            model="gpt-3.5-turbo",
+            deployment_id="chatgpt-test",
+            messages=messages,
+            custom_llm_provider="azure",
+        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

-# Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect. 
+
+# Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect.
 def test_completion_replicate_llama_stream():
    model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
    try:
@ -170,59 +277,69 @@ def test_completion_replicate_llama_stream():
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

+
 def test_completion_replicate_stability_stream():
    model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
    try:
-        response = completion(model=model_name, messages=messages, stream=True, custom_llm_provider="replicate")
+        response = completion(
+            model=model_name,
+            messages=messages,
+            stream=True,
+            custom_llm_provider="replicate",
+        )
        # Add any assertions here to check the response
        for chunk in response:
-            print(chunk['choices'][0]['delta'])
+            print(chunk["choices"][0]["delta"])
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

+
 def test_completion_replicate_stability():
    model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
    try:
-        response = completion(model=model_name, messages=messages, custom_llm_provider="replicate")
+        response = completion(
+            model=model_name, messages=messages, custom_llm_provider="replicate"
+        )
        # Add any assertions here to check the response
-        for result in response:
-            print(result)
-        print(response)
+        response_str = response["choices"][0]["message"]["content"]
+        response_str_2 = response.choices[0].message.content
+        print(response_str)
+        print(response_str_2)
+        if type(response_str) != str:
+            pytest.fail(f"Error occurred: {e}")
+        if type(response_str_2) != str:
+            pytest.fail(f"Error occurred: {e}")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

+
 ######## Test TogetherAI ########
 def test_completion_together_ai():
    model_name = "togethercomputer/llama-2-70b-chat"
    try:
-        response = completion(model=model_name, messages=messages, custom_llm_provider="together_ai")
+        response = completion(model=model_name, messages=messages)
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

-def test_completion_together_ai_stream():
-    model_name = "togethercomputer/llama-2-70b-chat"
-    try:
-        response = completion(model=model_name, messages=messages, custom_llm_provider="together_ai", stream=True)
-        # Add any assertions here to check the response
-        print(response)
-        for chunk in response:
-            print(chunk['choices'][0]['delta']) # same as openai format
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-

 def test_petals():
    model_name = "stabilityai/StableBeluga2"
    try:
-        response = completion(model=model_name, messages=messages, custom_llm_provider="petals", force_timeout=120)
+        response = completion(
+            model=model_name,
+            messages=messages,
+            custom_llm_provider="petals",
+            force_timeout=120,
+        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

+
 # def test_baseten_falcon_7bcompletion():
 #     model_name = "qvv0xeq"
 #     try:
@ -270,7 +387,6 @@ def test_petals():
 #         pytest.fail(f"Error occurred: {e}")


-
 #### Test A121 ###################
 # def test_completion_ai21():
 #     model_name = "j2-light"
@ -281,7 +397,7 @@ def test_petals():
 #     except Exception as e:
 #         pytest.fail(f"Error occurred: {e}")

-# test config file with completion # 
+# test config file with completion #
 # def test_completion_openai_config():
 #     try:
 #         litellm.config_path = "../config.json"
@ -294,4 +410,22 @@ def test_petals():
 #         pytest.fail(f"Error occurred: {e}")


+# import asyncio
+# def test_completion_together_ai_stream():
+#     user_message = "Write 1pg about YC & litellm"
+#     messages = [{ "content": user_message,"role": "user"}]
+#     try:
+#         response = completion(model="togethercomputer/llama-2-70b-chat", messages=messages, stream=True, max_tokens=800)
+#         print(response)
+#         asyncio.run(get_response(response))
+#         # print(string_response)
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")

+
+# async def get_response(generator):
+#     async for elem in generator:
+#         print(elem)
+#     return
+
+# test_completion_together_ai_stream()
--- a/litellm/tests/test_custom_api_base.py
+++ b/litellm/tests/test_custom_api_base.py
@ -1,20 +1,33 @@
 import sys, os
 import traceback
 from dotenv import load_dotenv
+
 load_dotenv()
 import os
-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
-import litellm 
-from litellm import completion 
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import litellm
+from litellm import completion
+

 def logging_fn(model_call_dict):
    print(f"model call details: {model_call_dict}")
+
+
 models = ["gorilla-7b-hf-v1", "gpt-4"]
 custom_llm_provider = None
 messages = [{"role": "user", "content": "Hey,  how's it going?"}]
-for model in models: # iterate through list
+for model in models:  # iterate through list
    custom_api_base = None
-    if model == "gorilla-7b-hf-v1": 
+    if model == "gorilla-7b-hf-v1":
        custom_llm_provider = "custom_openai"
        custom_api_base = "http://zanino.millennium.berkeley.edu:8000/v1"
-    completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider, custom_api_base=custom_api_base, logger_fn=logging_fn)
+    completion(
+        model=model,
+        messages=messages,
+        custom_llm_provider=custom_llm_provider,
+        custom_api_base=custom_api_base,
+        logger_fn=logging_fn,
+    )
--- a/litellm/tests/test_embedding.py
+++ b/litellm/tests/test_embedding.py
@ -1,20 +1,24 @@
-
 import sys, os
 import traceback
 import pytest

-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import litellm
 from litellm import embedding, completion
 from infisical import InfisicalClient

-# litellm.set_verbose = True
-litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
+# # litellm.set_verbose = True
+# litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"])
+

 def test_openai_embedding():
    try:
-        response = embedding(model='text-embedding-ada-002', input=["good morning from litellm"])
+        response = embedding(
+            model="text-embedding-ada-002", input=["good morning from litellm"]
+        )
        # Add any assertions here to check the response
        print(f"response: {str(response)}")
    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
+        pytest.fail(f"Error occurred: {e}")
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@ -1,10 +1,21 @@
 # from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, OpenAIError
-import os 
+import os
 import sys
 import traceback
-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import litellm
-from litellm import embedding, completion, AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
+from litellm import (
+    embedding,
+    completion,
+    AuthenticationError,
+    InvalidRequestError,
+    RateLimitError,
+    ServiceUnavailableError,
+    OpenAIError,
+)
 from concurrent.futures import ThreadPoolExecutor
 import pytest

@ -23,8 +34,10 @@ litellm.failure_callback = ["sentry"]
 # models = ["gpt-3.5-turbo", "chatgpt-test",  "claude-instant-1", "command-nightly"]
 test_model = "claude-instant-1"
 models = ["claude-instant-1"]
+
+
 def logging_fn(model_call_dict):
-    if "model" in model_call_dict: 
+    if "model" in model_call_dict:
        print(f"model_call_dict: {model_call_dict['model']}")
    else:
        print(f"model_call_dict: {model_call_dict}")
@ -38,13 +51,18 @@ def test_context_window(model):
    try:
        model = "chatgpt-test"
        print(f"model: {model}")
-        response = completion(model=model, messages=messages, custom_llm_provider="azure", logger_fn=logging_fn)
+        response = completion(
+            model=model,
+            messages=messages,
+            custom_llm_provider="azure",
+            logger_fn=logging_fn,
+        )
        print(f"response: {response}")
-    except InvalidRequestError:
-        print("InvalidRequestError")
+    except InvalidRequestError as e:
+        print(f"InvalidRequestError: {e.llm_provider}")
        return
-    except OpenAIError:
-        print("OpenAIError")
+    except OpenAIError as e:
+        print(f"OpenAIError: {e.llm_provider}")
        return
    except Exception as e:
        print("Uncaught Error in test_context_window")
@ -52,14 +70,17 @@ def test_context_window(model):
        print(f"Uncaught Exception - {e}")
        pytest.fail(f"Error occurred: {e}")
    return
+
+
 test_context_window(test_model)

+
 # Test 2: InvalidAuth Errors
@pytest.mark.parametrize("model", models)
-def invalid_auth(model): # set the model key to an invalid key, depending on the model 
-    messages = [{ "content": "Hello, how are you?","role": "user"}]
+def invalid_auth(model):  # set the model key to an invalid key, depending on the model
+    messages = [{"content": "Hello, how are you?", "role": "user"}]
    temporary_key = None
-    try: 
+    try:
        custom_llm_provider = None
        if model == "gpt-3.5-turbo":
            temporary_key = os.environ["OPENAI_API_KEY"]
@ -74,22 +95,29 @@ def invalid_auth(model): # set the model key to an invalid key, depending on the
        elif model == "command-nightly":
            temporary_key = os.environ["COHERE_API_KEY"]
            os.environ["COHERE_API_KEY"] = "bad-key"
-        elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
-            temporary_key = os.environ["REPLICATE_API_KEY"] 
+        elif (
+            model
+            == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
+        ):
+            temporary_key = os.environ["REPLICATE_API_KEY"]
            os.environ["REPLICATE_API_KEY"] = "bad-key"
        print(f"model: {model}")
-        response = completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider)
+        response = completion(
+            model=model, messages=messages, custom_llm_provider=custom_llm_provider
+        )
        print(f"response: {response}")
    except AuthenticationError as e:
-        print(f"AuthenticationError Caught Exception - {e}")
-    except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
+        print(f"AuthenticationError Caught Exception - {e.llm_provider}")
+    except (
+        OpenAIError
+    ):  # is at least an openai error -> in case of random model errors - e.g. overloaded server
        print(f"OpenAIError Caught Exception - {e}")
    except Exception as e:
        print(type(e))
        print(e.__class__.__name__)
        print(f"Uncaught Exception - {e}")
        pytest.fail(f"Error occurred: {e}")
-    if temporary_key != None: # reset the key
+    if temporary_key != None:  # reset the key
        if model == "gpt-3.5-turbo":
            os.environ["OPENAI_API_KEY"] = temporary_key
        elif model == "chatgpt-test":
@ -99,13 +127,18 @@ def invalid_auth(model): # set the model key to an invalid key, depending on the
            os.environ["ANTHROPIC_API_KEY"] = temporary_key
        elif model == "command-nightly":
            os.environ["COHERE_API_KEY"] = temporary_key
-        elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
+        elif (
+            model
+            == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
+        ):
            os.environ["REPLICATE_API_KEY"] = temporary_key
    return
+
+
 invalid_auth(test_model)
-# # Test 3: Rate Limit Errors 
+# # Test 3: Rate Limit Errors
 # def test_model(model):
-#     try: 
+#     try:
 #         sample_text = "how does a court case get to the Supreme Court?" * 50000
 #         messages = [{ "content": sample_text,"role": "user"}]
 #         custom_llm_provider = None
@ -142,5 +175,3 @@ invalid_auth(test_model)

 # accuracy_score = counts[True]/(counts[True] + counts[False])
 # print(f"accuracy_score: {accuracy_score}")
-
-
--- a/litellm/tests/test_helicone_integration.py
+++ b/litellm/tests/test_helicone_integration.py
@ -5,7 +5,9 @@ import sys, os
 import traceback
 import pytest

-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import litellm
 from litellm import embedding, completion

@ -14,11 +16,15 @@ litellm.success_callback = ["helicone"]
 litellm.set_verbose = True

 user_message = "Hello, how are you?"
-messages = [{ "content": user_message,"role": "user"}]
+messages = [{"content": user_message, "role": "user"}]


-#openai call
-response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) 
+# openai call
+response = completion(
+    model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]
+)

-#cohere call
-response = completion(model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}]) 
+# cohere call
+response = completion(
+    model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}]
+)
--- a/litellm/tests/test_litedebugger_integration.py
+++ b/litellm/tests/test_litedebugger_integration.py
@ -0,0 +1,26 @@
+# #### What this tests ####
+# #    This tests if logging to the litedebugger integration actually works
+# # pytest mistakes intentional bad calls as failed tests -> [TODO] fix this
+# import sys, os
+# import traceback
+# import pytest
+
+# sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+# import litellm
+# from litellm import embedding, completion
+
+# litellm.input_callback = ["lite_debugger"]
+# litellm.success_callback = ["lite_debugger"]
+# litellm.failure_callback = ["lite_debugger"]
+
+# litellm.set_verbose = True
+
+# user_message = "Hello, how are you?"
+# messages = [{ "content": user_message,"role": "user"}]
+
+
+# #openai call
+# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
+
+# #bad request call
+# response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])
--- a/litellm/tests/test_load_test_model.py
+++ b/litellm/tests/test_load_test_model.py
@ -1,9 +1,37 @@
 import sys, os
 import traceback
-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
-import litellm
-from litellm import load_test_model

-model="gpt-3.5-turbo"
-result = load_test_model(model=model, num_calls=5)
-print(result)
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import litellm
+from litellm import load_test_model, testing_batch_completion
+
+# ## Load Test Model
+# model="gpt-3.5-turbo"
+# result = load_test_model(model=model, num_calls=5)
+# print(result)
+# print(len(result["results"]))
+
+# ## Duration Test Model
+# model="gpt-3.5-turbo"
+# result = load_test_model(model=model, num_calls=5, duration=15, interval=15) # duration test the model for 2 minutes, sending 5 calls every 15s
+# print(result)
+
+## Quality Test across Model
+models = [
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-16k",
+    "gpt-4",
+    "claude-instant-1",
+    {
+        "model": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781",
+        "custom_llm_provider": "replicate",
+    },
+]
+messages = [
+    [{"role": "user", "content": "What is your name?"}],
+    [{"role": "user", "content": "Hey, how's it going?"}],
+]
+result = testing_batch_completion(models=models, messages=messages)
+print(result)
--- a/litellm/tests/test_logging.py
+++ b/litellm/tests/test_logging.py
@ -3,7 +3,10 @@

 import sys, os
 import traceback
-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import litellm
 from litellm import embedding, completion

@ -11,49 +14,53 @@ litellm.set_verbose = False

 score = 0

+
 def logger_fn(model_call_object: dict):
    print(f"model call details: {model_call_object}")

-user_message = "Hello, how are you?"
-messages = [{ "content": user_message,"role": "user"}]

-# test on openai completion call 
+user_message = "Hello, how are you?"
+messages = [{"content": user_message, "role": "user"}]
+
+# test on openai completion call
 try:
    response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn)
-    score +=1 
+    score += 1
 except:
-    print(f"error occurred: {traceback.format_exc()}") 
+    print(f"error occurred: {traceback.format_exc()}")
    pass

-# test on non-openai completion call 
+# test on non-openai completion call
 try:
-    response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
+    response = completion(
+        model="claude-instant-1", messages=messages, logger_fn=logger_fn
+    )
    print(f"claude response: {response}")
-    score +=1 
+    score += 1
 except:
-    print(f"error occurred: {traceback.format_exc()}") 
+    print(f"error occurred: {traceback.format_exc()}")
    pass

-# # test on openai embedding call 
-# try: 
+# # test on openai embedding call
+# try:
 #     response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn)
-#     score +=1 
+#     score +=1
 # except:
 #     traceback.print_exc()

 # # test on bad azure openai embedding call -> missing azure flag and this isn't an embedding model
-# try: 
+# try:
 #     response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn)
 # except:
 #     score +=1 # expect this to fail
 #     traceback.print_exc()

-# # test on good azure openai embedding call 
-# try: 
+# # test on good azure openai embedding call
+# try:
 #     response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
-#     score +=1 
+#     score +=1
 # except:
 #     traceback.print_exc()


-# print(f"Score: {score}, Overall score: {score/5}")
+# print(f"Score: {score}, Overall score: {score/5}")
--- a/litellm/tests/test_model_fallback.py
+++ b/litellm/tests/test_model_fallback.py
@ -3,7 +3,10 @@

 import sys, os
 import traceback
-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import litellm
 from litellm import embedding, completion

@ -15,11 +18,11 @@ litellm.set_verbose = True
 model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"]

 user_message = "Hello, how are you?"
-messages = [{ "content": user_message,"role": "user"}]
+messages = [{"content": user_message, "role": "user"}]

 for model in model_fallback_list:
    try:
        response = embedding(model="text-embedding-ada-002", input=[user_message])
        response = completion(model=model, messages=messages)
    except Exception as e:
-        print(f"error occurred: {traceback.format_exc()}") 
+        print(f"error occurred: {traceback.format_exc()}")
--- a/litellm/tests/test_model_response_typing/server.py
+++ b/litellm/tests/test_model_response_typing/server.py
@ -0,0 +1,23 @@
+# #### What this tests ####
+# #    This tests if the litellm model response type is returnable in a flask app
+
+# import sys, os
+# import traceback
+# from flask import Flask, request, jsonify, abort, Response
+# sys.path.insert(0, os.path.abspath('../../..'))  # Adds the parent directory to the system path
+
+# import litellm
+# from litellm import completion
+
+# litellm.set_verbose = False
+
+# app = Flask(__name__)
+
+# @app.route('/')
+# def hello():
+#     data = request.json
+#     return completion(**data)
+
+# if __name__ == '__main__':
+#     from waitress import serve
+#     serve(app, host='localhost', port=8080, threads=10)
--- a/litellm/tests/test_model_response_typing/test.py
+++ b/litellm/tests/test_model_response_typing/test.py
@ -0,0 +1,14 @@
+# import requests, json
+
+# BASE_URL = 'http://localhost:8080'
+
+# def test_hello_route():
+#     data = {"model": "claude-instant-1", "messages": [{"role": "user", "content": "hey, how's it going?"}]}
+#     headers = {'Content-Type': 'application/json'}
+#     response = requests.get(BASE_URL, headers=headers, data=json.dumps(data))
+#     print(response.text)
+#     assert response.status_code == 200
+#     print("Hello route test passed!")
+
+# if __name__ == '__main__':
+#     test_hello_route()
--- a/litellm/tests/test_no_client.py
+++ b/litellm/tests/test_no_client.py
@ -4,7 +4,10 @@

 import sys, os
 import traceback
-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import litellm
 from litellm import embedding, completion

@ -13,11 +16,11 @@ litellm.set_verbose = True
 model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"]

 user_message = "Hello, how are you?"
-messages = [{ "content": user_message,"role": "user"}]
+messages = [{"content": user_message, "role": "user"}]

 for model in model_fallback_list:
    try:
        response = embedding(model="text-embedding-ada-002", input=[user_message])
        response = completion(model=model, messages=messages)
    except Exception as e:
-        print(f"error occurred: {traceback.format_exc()}") 
+        print(f"error occurred: {traceback.format_exc()}")
--- a/litellm/tests/test_ollama.py
+++ b/litellm/tests/test_ollama.py
@ -53,7 +53,6 @@
 # # # return this generator to the client for streaming requests


-
 # # async def get_response():
 # #     global generator
 # #     async for elem in generator:
--- a/litellm/tests/test_ollama_local.py
+++ b/litellm/tests/test_ollama_local.py
@ -12,7 +12,6 @@
 # import asyncio


-
 # user_message = "respond in 20 words. who are you?"
 # messages = [{ "content": user_message,"role": "user"}]

@ -45,8 +44,3 @@
 #         pytest.fail(f"Error occurred: {e}")

 # test_completion_ollama_stream()
-
-
-
-
-
--- a/litellm/tests/test_secrets.py
+++ b/litellm/tests/test_secrets.py
@ -4,7 +4,10 @@

 import sys, os
 import traceback
-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import litellm
 from litellm import embedding, completion
 from infisical import InfisicalClient
@ -15,15 +18,8 @@ infisical_token = os.environ["INFISICAL_TOKEN"]
 litellm.secret_manager_client = InfisicalClient(token=infisical_token)

 user_message = "Hello, whats the weather in San Francisco??"
-messages = [{ "content": user_message,"role": "user"}]
+messages = [{"content": user_message, "role": "user"}]

-def test_completion_azure():
-    try:
-        response = completion(model="gpt-3.5-turbo", deployment_id="chatgpt-test", messages=messages, custom_llm_provider="azure")
-        # Add any assertions here to check the response
-        print(response)
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")

 def test_completion_openai():
    try:
@ -31,12 +27,9 @@ def test_completion_openai():
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
+        litellm.secret_manager_client = None
        pytest.fail(f"Error occurred: {e}")
+    litellm.secret_manager_client = None

-def test_completion_openai_with_optional_params():
-    try:
-        response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, user="ishaan_dev@berri.ai")
-        # Add any assertions here to check the response
-        print(response)
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
+
+test_completion_openai()
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -3,7 +3,10 @@

 import sys, os
 import traceback
-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import litellm
 from litellm import completion

@ -11,29 +14,40 @@ litellm.set_verbose = False

 score = 0

+
 def logger_fn(model_call_object: dict):
    print(f"model call details: {model_call_object}")

-user_message = "Hello, how are you?"
-messages = [{ "content": user_message,"role": "user"}]

-# test on anthropic completion call 
+user_message = "Hello, how are you?"
+messages = [{"content": user_message, "role": "user"}]
+
+# test on anthropic completion call
 try:
-    response = completion(model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn)
+    response = completion(
+        model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn
+    )
    for chunk in response:
-        print(chunk['choices'][0]['delta'])
-    score +=1 
+        print(chunk["choices"][0]["delta"])
+    score += 1
 except:
-    print(f"error occurred: {traceback.format_exc()}") 
+    print(f"error occurred: {traceback.format_exc()}")
    pass


-# test on anthropic completion call 
+# test on anthropic completion call
 try:
-    response = completion(model="meta-llama/Llama-2-7b-chat-hf", messages=messages, custom_llm_provider="huggingface", custom_api_base="https://s7c7gytn18vnu4tw.us-east-1.aws.endpoints.huggingface.cloud", stream=True, logger_fn=logger_fn)
+    response = completion(
+        model="meta-llama/Llama-2-7b-chat-hf",
+        messages=messages,
+        custom_llm_provider="huggingface",
+        custom_api_base="https://s7c7gytn18vnu4tw.us-east-1.aws.endpoints.huggingface.cloud",
+        stream=True,
+        logger_fn=logger_fn,
+    )
    for chunk in response:
-        print(chunk['choices'][0]['delta'])
-    score +=1 
+        print(chunk["choices"][0]["delta"])
+    score += 1
 except:
-    print(f"error occurred: {traceback.format_exc()}") 
-    pass
+    print(f"error occurred: {traceback.format_exc()}")
+    pass
--- a/litellm/tests/test_supabase_integration.py
+++ b/litellm/tests/test_supabase_integration.py
@ -1,5 +1,5 @@
 # #### What this tests ####
-# #    This tests if logging to the helicone integration actually works
+# #    This tests if logging to the supabase integration actually works
 # # pytest mistakes intentional bad calls as failed tests -> [TODO] fix this
 # import sys, os
 # import traceback
@ -9,10 +9,11 @@
 # import litellm
 # from litellm import embedding, completion

+# litellm.input_callback = ["supabase"]
 # litellm.success_callback = ["supabase"]
 # litellm.failure_callback = ["supabase"]

-# litellm.modify_integration("supabase",{"table_name": "litellm_logs"})
+# # litellm.modify_integration("supabase",{"table_name": "test_table"})

 # litellm.set_verbose = True

@ -21,7 +22,7 @@


 # #openai call
-# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) 
+# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])

 # #bad request call
-# response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}]) 
+# response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])
--- a/litellm/tests/test_timeout.py
+++ b/litellm/tests/test_timeout.py
@ -3,10 +3,14 @@

 import sys, os
 import traceback
-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import time
 from litellm import timeout

+
@timeout(10)
 def stop_after_10_s(force_timeout=60):
    print("Stopping after 10 seconds")
@ -14,14 +18,14 @@ def stop_after_10_s(force_timeout=60):
    return


-start_time = time.time() 
+start_time = time.time()

 try:
-  stop_after_10_s(force_timeout=1)
+    stop_after_10_s(force_timeout=1)
 except Exception as e:
-  print(e)
-  pass
+    print(e)
+    pass

 end_time = time.time()

-print(f"total time: {end_time-start_time}")
+print(f"total time: {end_time-start_time}")
--- a/litellm/tests/test_vertex.py
+++ b/litellm/tests/test_vertex.py
@ -49,4 +49,4 @@

 # # chat = chat_model.start_chat()
 # # response = chat.send_message("who are u? write a sentence", **parameters)
-# # print(f"Response from Model: {response.text}")
+# # print(f"Response from Model: {response.text}")
--- a/litellm/timeout.py
+++ b/litellm/timeout.py
@ -11,9 +11,7 @@ from threading import Thread
 from openai.error import Timeout


-def timeout(
-    timeout_duration: float = None, exception_to_raise = Timeout
-):
+def timeout(timeout_duration: float = 0.0, exception_to_raise=Timeout):
    """
    Wraps a function to raise the specified exception if execution time
    is greater than the specified timeout.
@ -44,7 +42,9 @@ def timeout(
                result = future.result(timeout=local_timeout_duration)
            except futures.TimeoutError:
                thread.stop_loop()
-                raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).")
+                raise exception_to_raise(
+                    f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s)."
+                )
            thread.stop_loop()
            return result

@ -59,7 +59,9 @@ def timeout(
                )
                return value
            except asyncio.TimeoutError:
-                raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).")
+                raise exception_to_raise(
+                    f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s)."
+                )

        if iscoroutinefunction(func):
            return async_wrapper
@ -80,4 +82,4 @@ class _LoopWrapper(Thread):
    def stop_loop(self):
        for task in asyncio.all_tasks(self.loop):
            task.cancel()
-        self.loop.call_soon_threadsafe(self.loop.stop)
+        self.loop.call_soon_threadsafe(self.loop.stop)
--- a/litellm/utils.py
+++ b/litellm/utils.py
--- a/poetry.lock
+++ b/poetry.lock
--- a/cookbook/proxy-server/.DS_Store
+++ b/cookbook/proxy-server/.DS_Store
--- a/cookbook/proxy-server/Dockerfile
+++ b/cookbook/proxy-server/Dockerfile
--- a/cookbook/proxy-server/LICENSE
+++ b/cookbook/proxy-server/LICENSE
--- a/proxy-server/main.py
+++ b/proxy-server/main.py
@ -0,0 +1,86 @@
+from flask import Flask, request, jsonify, abort, Response
+from flask_cors import CORS
+import traceback
+import litellm
+
+from litellm import completion 
+import openai
+from utils import handle_error, get_cache, add_cache
+import os, dotenv
+import logging
+import json
+dotenv.load_dotenv()
+
+# TODO: set your keys in .env or here:
+# os.environ["OPENAI_API_KEY"] = "" # set your openai key here
+# see supported models / keys here: https://litellm.readthedocs.io/en/latest/supported/
+
+######### LOGGING ###################
+# log your data to slack, supabase
+litellm.success_callback=["slack", "supabase"] # set .env SLACK_API_TOKEN, SLACK_API_SECRET, SLACK_API_CHANNEL, SUPABASE 
+
+######### ERROR MONITORING ##########
+# log errors to slack, sentry, supabase
+litellm.failure_callback=["slack", "sentry", "supabase"] # .env SENTRY_API_URL
+
+app = Flask(__name__)
+CORS(app)
+
+@app.route('/')
+def index():
+    return 'received!', 200
+
+def data_generator(response):
+    for chunk in response:
+        yield f"data: {json.dumps(chunk)}\n\n"
+
+@app.route('/chat/completions', methods=["POST"])
+def api_completion():
+    data = request.json
+    if data.get('stream') == "True":
+        data['stream'] = True # convert to boolean
+    try:
+        # pass in data to completion function, unpack data
+        response = completion(**data)
+        if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+            return Response(data_generator(response), mimetype='text/event-stream')
+    except Exception as e:
+        # call handle_error function
+        print(f"got error{e}")
+        return handle_error(data)
+    return response, 200 # non streaming responses
+
+@app.route('/get_models', methods=["POST"])
+def get_models():
+    try:
+        return litellm.model_list
+    except Exception as e:
+        traceback.print_exc()
+        response = {"error": str(e)}
+    return response, 200
+
+if __name__ == "__main__":
+  from waitress import serve
+  serve(app, host="0.0.0.0", port=5000, threads=500)
+
+############### Advanced ##########################
+
+############ Caching ###################################
+# make a new endpoint with caching
+# This Cache is built using ChromaDB
+# it has two functions add_cache() and get_cache()
+@app.route('/chat/completions_with_cache', methods=["POST"])
+def api_completion_with_cache():
+    data = request.json
+    try:
+        cache_response = get_cache(data['messages'])
+        if cache_response!=None:
+            return cache_response
+        # pass in data to completion function, unpack data
+        response = completion(**data) 
+
+        # add to cache 
+    except Exception as e:
+        # call handle_error function
+        return handle_error(data)
+    return response, 200
--- a/cookbook/proxy-server/models_info.json
+++ b/cookbook/proxy-server/models_info.json
--- a/proxy-server/readme.md
+++ b/proxy-server/readme.md
@ -0,0 +1,168 @@
+
+# liteLLM Proxy Server: 50+ LLM Models, Error Handling, Caching
+### Azure, Llama2, OpenAI, Claude, Hugging Face, Replicate Models
+[![PyPI Version](https://img.shields.io/pypi/v/litellm.svg)](https://pypi.org/project/litellm/)
+[![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.345-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/)
+![Downloads](https://img.shields.io/pypi/dm/litellm)
+[![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere%7CReplicate%7CHugging%20Face-blue?color=green)](https://github.com/BerriAI/litellm)
+
+[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/DYqQAW?referralCode=t3ukrU)
+
+![4BC6491E-86D0-4833-B061-9F54524B2579](https://github.com/BerriAI/litellm/assets/17561003/f5dd237b-db5e-42e1-b1ac-f05683b1d724)
+
+## What does liteLLM proxy do
+- Make `/chat/completions` requests for 50+ LLM models **Azure, OpenAI, Replicate, Anthropic, Hugging Face**
+  
+  Example: for `model` use `claude-2`, `gpt-3.5`, `gpt-4`, `command-nightly`, `stabilityai/stablecode-completion-alpha-3b-4k`
+  ```json
+  {
+    "model": "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1",
+    "messages": [
+                    { 
+                        "content": "Hello, whats the weather in San Francisco??",
+                        "role": "user"
+                    }
+                ]
+  }
+  ```
+- **Consistent Input/Output** Format
+    - Call all models using the OpenAI format - `completion(model, messages)`
+    - Text responses will always be available at `['choices'][0]['message']['content']`
+- **Error Handling** Using Model Fallbacks (if `GPT-4` fails, try `llama2`)
+- **Logging** - Log Requests, Responses and Errors to `Supabase`, `Posthog`, `Mixpanel`, `Sentry`, `Helicone` (Any of the supported providers here: https://litellm.readthedocs.io/en/latest/advanced/
+
+ **Example: Logs sent to Supabase**
+  <img width="1015" alt="Screenshot 2023-08-11 at 4 02 46 PM" src="https://github.com/ishaan-jaff/proxy-server/assets/29436595/237557b8-ba09-4917-982c-8f3e1b2c8d08">
+
+- **Token Usage & Spend** - Track Input + Completion tokens used + Spend/model
+- **Caching** - Implementation of Semantic Caching
+- **Streaming & Async Support** - Return generators to stream text responses
+
+
+## API Endpoints
+
+### `/chat/completions` (POST)
+
+This endpoint is used to generate chat completions for 50+ support LLM API Models. Use llama2, GPT-4, Claude2 etc
+
+#### Input
+This API endpoint accepts all inputs in raw JSON and expects the following inputs
+- `model` (string, required): ID of the model to use for chat completions. See all supported models [here]: (https://litellm.readthedocs.io/en/latest/supported/): 
+ eg `gpt-3.5-turbo`, `gpt-4`, `claude-2`, `command-nightly`, `stabilityai/stablecode-completion-alpha-3b-4k`
+- `messages` (array, required): A list of messages representing the conversation context. Each message should have a `role` (system, user, assistant, or function), `content` (message text), and `name` (for function role).
+- Additional Optional parameters: `temperature`, `functions`, `function_call`, `top_p`, `n`, `stream`. See the full list of supported inputs here: https://litellm.readthedocs.io/en/latest/input/
+
+
+#### Example JSON body
+For claude-2
+```json
+{
+    "model": "claude-2",
+    "messages": [
+                    { 
+                        "content": "Hello, whats the weather in San Francisco??",
+                        "role": "user"
+                    }
+                ]
+    
+}
+```
+
+### Making an API request to the Proxy Server
+```python
+import requests
+import json
+
+# TODO: use your URL 
+url = "http://localhost:5000/chat/completions"
+
+payload = json.dumps({
+  "model": "gpt-3.5-turbo",
+  "messages": [
+    {
+      "content": "Hello, whats the weather in San Francisco??",
+      "role": "user"
+    }
+  ]
+})
+headers = {
+  'Content-Type': 'application/json'
+}
+response = requests.request("POST", url, headers=headers, data=payload)
+print(response.text)
+
+```
+
+### Output [Response Format]
+Responses from the server are given in the following format. 
+All responses from the server are returned in the following format (for all LLM models). More info on output here: https://litellm.readthedocs.io/en/latest/output/
+```json
+{
+    "choices": [
+        {
+            "finish_reason": "stop",
+            "index": 0,
+            "message": {
+                "content": "I'm sorry, but I don't have the capability to provide real-time weather information. However, you can easily check the weather in San Francisco by searching online or using a weather app on your phone.",
+                "role": "assistant"
+            }
+        }
+    ],
+    "created": 1691790381,
+    "id": "chatcmpl-7mUFZlOEgdohHRDx2UpYPRTejirzb",
+    "model": "gpt-3.5-turbo-0613",
+    "object": "chat.completion",
+    "usage": {
+        "completion_tokens": 41,
+        "prompt_tokens": 16,
+        "total_tokens": 57
+    }
+}
+```
+
+## Installation & Usage
+### Running Locally
+1. Clone liteLLM repository to your local machine:
+   ```
+   git clone https://github.com/BerriAI/liteLLM-proxy
+   ```
+2. Install the required dependencies using pip
+   ```
+   pip install requirements.txt
+   ```
+3. Set your LLM API keys
+   ```
+   os.environ['OPENAI_API_KEY]` = "YOUR_API_KEY"
+   or
+   set OPENAI_API_KEY in your .env file
+   ```
+4. Run the server:
+   ```
+   python main.py
+   ```
+
+   
+
+## Deploying
+1. Quick Start: Deploy on Railway
+
+   [![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/DYqQAW?referralCode=t3ukrU)
+   
+2. `GCP`, `AWS`, `Azure` 
+This project includes a `Dockerfile` allowing you to build and deploy a Docker Project on your providers
+
+# Support / Talk with founders
+- [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
+- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
+- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
+- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
+
+
+## Roadmap
+- [ ] Support hosted db (e.g. Supabase)
+- [ ] Easily send data to places like posthog and sentry.
+- [ ] Add a hot-cache for project spend logs - enables fast checks for user + project limitings
+- [ ] Implement user-based rate-limiting
+- [ ] Spending controls per project - expose key creation endpoint
+- [ ] Need to store a keys db -> mapping created keys to their alias (i.e. project name)
+- [ ] Easily add new models as backups / as the entry-point (add this to the available model list)
--- a/cookbook/proxy-server/requirements.txt
+++ b/cookbook/proxy-server/requirements.txt
--- a/cookbook/proxy-server/test_proxy.py
+++ b/cookbook/proxy-server/test_proxy.py
--- a/proxy-server/test_proxy_stream.py
+++ b/proxy-server/test_proxy_stream.py
@ -0,0 +1,21 @@
+# import openai
+# import os
+
+# os.environ["OPENAI_API_KEY"] = ""
+
+# openai.api_key = os.environ["OPENAI_API_KEY"]
+# openai.api_base ="http://localhost:5000"
+
+# messages = [
+#     {
+#         "role": "user",
+#         "content": "write a 1 pg essay in liteLLM"
+#     }
+# ]
+
+# response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages, stream=True)
+# print("got response", response)
+# # response is a generator
+
+# for chunk in response:
+#     print(chunk)
--- a/cookbook/proxy-server/main.py
+++ b/cookbook/proxy-server/main.py
@ -1,53 +1,15 @@
-from flask import Flask, request, jsonify, abort
-from flask_cors import CORS
-import traceback
-import litellm

 from litellm import completion 
 import os, dotenv
+import json
 dotenv.load_dotenv()
-
-######### LOGGING ###################
-# log your data to slack, supabase
-litellm.success_callback=["slack", "supabase"] # set .env SLACK_API_TOKEN, SLACK_API_SECRET, SLACK_API_CHANNEL, SUPABASE 
-
-######### ERROR MONITORING ##########
-# log errors to slack, sentry, supabase
-litellm.failure_callback=["slack", "sentry", "supabase"] # .env SENTRY_API_URL
-
-app = Flask(__name__)
-CORS(app)
-
-@app.route('/')
-def index():
-    return 'received!', 200
-
-@app.route('/chat/completions', methods=["POST"])
-def api_completion():
-    data = request.json
-    try:
-        # pass in data to completion function, unpack data
-        response = completion(**data)
-    except Exception as e:
-        # call handle_error function
-        return handle_error(data)
-    return response, 200
-
-@app.route('/get_models', methods=["POST"])
-def get_models():
-    try:
-        return litellm.model_list
-    except Exception as e:
-        traceback.print_exc()
-        response = {"error": str(e)}
-    return response, 200
-
-if __name__ == "__main__":
-  from waitress import serve
-  serve(app, host="0.0.0.0", port=5000, threads=500)
-
 ############### Advanced ##########################

+########### streaming ############################
+def generate_responses(response):
+    for chunk in response:
+        yield json.dumps({"response": chunk}) + "\n"
+
 ################ ERROR HANDLING #####################
 # implement model fallbacks, cooldowns, and retries
 # if a model fails assume it was rate limited and let it cooldown for 60s
@ -82,26 +44,6 @@ def handle_error(data):



-############ Caching ###################################
-# make a new endpoint with caching
-# This Cache is built using ChromaDB
-# it has two functions add_cache() and get_cache()
-@app.route('/chat/completions', methods=["POST"])
-def api_completion_with_cache():
-    data = request.json
-    try:
-        cache_response = get_cache(data['messages'])
-        if cache_response!=None:
-            return cache_response
-        # pass in data to completion function, unpack data
-        response = completion(**data) 
-
-        # add to cache 
-    except Exception as e:
-        # call handle_error function
-        return handle_error(data)
-    return response, 200
-
 import uuid
 cache_collection = None
 # Add a response to the cache
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.400"
+version = "0.1.436"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"