(code quality) run ruff rule to ban unused imports (#7313)

* remove unused imports * fix AmazonConverseConfig * fix test * fix import * ruff check fixes * test fixes * fix testing * fix imports
2025-04-24 18:24:20 +00:00 · 2024-12-19 12:33:42 -08:00 · 2024-12-19 12:33:42 -08:00 · c7f14e936a
commit c7f14e936a
parent 5e344497ce
347 changed files with 5473 additions and 7207 deletions
--- a/.github/workflows/locustfile.py
+++ b/.github/workflows/locustfile.py
@ -1,6 +1,4 @@
-from locust import HttpUser, task, between, events
-import json
-import time
+from locust import HttpUser, task, between


 class MyUser(HttpUser):
@ -10,7 +8,7 @@ class MyUser(HttpUser):
    def chat_completion(self):
        headers = {
            "Content-Type": "application/json",
-            "Authorization": f"Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
+            "Authorization": "Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
            # Include any additional headers you may need for authentication, etc.
        }

--- a/cookbook/Benchmarking_LLMs_by_use_case.ipynb
+++ b/cookbook/Benchmarking_LLMs_by_use_case.ipynb
--- a/cookbook/Evaluating_LLMs.ipynb
+++ b/cookbook/Evaluating_LLMs.ipynb
--- a/cookbook/LiteLLM_Azure_and_OpenAI_example.ipynb
+++ b/cookbook/LiteLLM_Azure_and_OpenAI_example.ipynb
@ -1,423 +1,422 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "BmX0b5Ueh91v"
+   },
+   "source": [
+    "# LiteLLM - Azure OpenAI + OpenAI Calls\n",
+    "This notebook covers the following for Azure OpenAI + OpenAI:\n",
+    "* Completion - Quick start\n",
+    "* Completion - Streaming\n",
+    "* Completion - Azure, OpenAI in separate threads\n",
+    "* Completion - Stress Test 10 requests in parallel\n",
+    "* Completion - Azure, OpenAI in the same thread"
+   ]
  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# LiteLLM - Azure OpenAI + OpenAI Calls\n",
-        "This notebook covers the following for Azure OpenAI + OpenAI:\n",
-        "* Completion - Quick start\n",
-        "* Completion - Streaming\n",
-        "* Completion - Azure, OpenAI in separate threads\n",
-        "* Completion - Stress Test 10 requests in parallel\n",
-        "* Completion - Azure, OpenAI in the same thread"
-      ],
-      "metadata": {
-        "id": "BmX0b5Ueh91v"
-      }
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "iHq4d0dpfawS"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install litellm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "id": "mnveHO5dfcB0"
+   },
+   "outputs": [],
+   "source": [
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "eo88QUdbiDIE"
+   },
+   "source": [
+    "## Completion - Quick start"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
    },
+    "id": "5OSosWNCfc_2",
+    "outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
+   },
+   "outputs": [
    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "iHq4d0dpfawS"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install litellm"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os, litellm"
-      ],
-      "metadata": {
-        "id": "mnveHO5dfcB0"
-      },
-      "execution_count": 2,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Completion - Quick start"
-      ],
-      "metadata": {
-        "id": "eo88QUdbiDIE"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os\n",
-        "from litellm import completion\n",
-        "\n",
-        "# openai configs\n",
-        "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
-        "\n",
-        "# azure openai configs\n",
-        "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
-        "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
-        "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
-        "\n",
-        "\n",
-        "# openai call\n",
-        "response = completion(\n",
-        "    model = \"gpt-3.5-turbo\",\n",
-        "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
-        ")\n",
-        "print(\"Openai Response\\n\")\n",
-        "print(response)\n",
-        "\n",
-        "\n",
-        "\n",
-        "# azure call\n",
-        "response = completion(\n",
-        "    model = \"azure/your-azure-deployment\",\n",
-        "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
-        ")\n",
-        "print(\"Azure Response\\n\")\n",
-        "print(response)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "5OSosWNCfc_2",
-        "outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
-      },
-      "execution_count": 12,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Openai Response\n",
-            "\n",
-            "{\n",
-            "  \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n",
-            "  \"object\": \"chat.completion\",\n",
-            "  \"created\": 1694708958,\n",
-            "  \"model\": \"gpt-3.5-turbo-0613\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"index\": 0,\n",
-            "      \"message\": {\n",
-            "        \"role\": \"assistant\",\n",
-            "        \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n",
-            "      },\n",
-            "      \"finish_reason\": \"stop\"\n",
-            "    }\n",
-            "  ],\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": 13,\n",
-            "    \"completion_tokens\": 26,\n",
-            "    \"total_tokens\": 39\n",
-            "  }\n",
-            "}\n",
-            "Azure Response\n",
-            "\n",
-            "{\n",
-            "  \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n",
-            "  \"object\": \"chat.completion\",\n",
-            "  \"created\": 1694708960,\n",
-            "  \"model\": \"gpt-35-turbo\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"index\": 0,\n",
-            "      \"finish_reason\": \"stop\",\n",
-            "      \"message\": {\n",
-            "        \"role\": \"assistant\",\n",
-            "        \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"usage\": {\n",
-            "    \"completion_tokens\": 27,\n",
-            "    \"prompt_tokens\": 14,\n",
-            "    \"total_tokens\": 41\n",
-            "  }\n",
-            "}\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Completion - Streaming"
-      ],
-      "metadata": {
-        "id": "dQMkM-diiKdE"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os\n",
-        "from litellm import completion\n",
-        "\n",
-        "# openai configs\n",
-        "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
-        "\n",
-        "# azure openai configs\n",
-        "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
-        "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
-        "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
-        "\n",
-        "\n",
-        "# openai call\n",
-        "response = completion(\n",
-        "    model = \"gpt-3.5-turbo\",\n",
-        "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
-        "    stream=True\n",
-        ")\n",
-        "print(\"OpenAI Streaming response\")\n",
-        "for chunk in response:\n",
-        "  print(chunk)\n",
-        "\n",
-        "# azure call\n",
-        "response = completion(\n",
-        "    model = \"azure/your-azure-deployment\",\n",
-        "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
-        "    stream=True\n",
-        ")\n",
-        "print(\"Azure Streaming response\")\n",
-        "for chunk in response:\n",
-        "  print(chunk)\n"
-      ],
-      "metadata": {
-        "id": "uVvJDVn4g1i1"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Completion - Azure, OpenAI in separate threads"
-      ],
-      "metadata": {
-        "id": "4xrOPnt-oqwm"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os\n",
-        "import threading\n",
-        "from litellm import completion\n",
-        "\n",
-        "# Function to make a completion call\n",
-        "def make_completion(model, messages):\n",
-        "    response = completion(\n",
-        "        model=model,\n",
-        "        messages=messages\n",
-        "    )\n",
-        "\n",
-        "    print(f\"Response for {model}: {response}\")\n",
-        "\n",
-        "# openai configs\n",
-        "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
-        "\n",
-        "# azure openai configs\n",
-        "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
-        "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
-        "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
-        "\n",
-        "# Define the messages for the completions\n",
-        "messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
-        "\n",
-        "# Create threads for making the completions\n",
-        "thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n",
-        "thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n",
-        "\n",
-        "# Start both threads\n",
-        "thread1.start()\n",
-        "thread2.start()\n",
-        "\n",
-        "# Wait for both threads to finish\n",
-        "thread1.join()\n",
-        "thread2.join()\n",
-        "\n",
-        "print(\"Both completions are done.\")"
-      ],
-      "metadata": {
-        "id": "V5b5taJPjvC3"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Completion - Stress Test 10 requests in parallel\n",
-        "\n"
-      ],
-      "metadata": {
-        "id": "lx8DbMBqoAoN"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os\n",
-        "import threading\n",
-        "from litellm import completion\n",
-        "\n",
-        "# Function to make a completion call\n",
-        "def make_completion(model, messages):\n",
-        "    response = completion(\n",
-        "        model=model,\n",
-        "        messages=messages\n",
-        "    )\n",
-        "\n",
-        "    print(f\"Response for {model}: {response}\")\n",
-        "\n",
-        "# Set your API keys\n",
-        "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
-        "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
-        "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
-        "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
-        "\n",
-        "# Define the messages for the completions\n",
-        "messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
-        "\n",
-        "# Create and start 10 threads for making completions\n",
-        "threads = []\n",
-        "for i in range(10):\n",
-        "    thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n",
-        "    threads.append(thread)\n",
-        "    thread.start()\n",
-        "\n",
-        "# Wait for all threads to finish\n",
-        "for thread in threads:\n",
-        "    thread.join()\n",
-        "\n",
-        "print(\"All completions are done.\")\n"
-      ],
-      "metadata": {
-        "id": "pHYANOlOkoDh"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Completion - Azure, OpenAI in the same thread"
-      ],
-      "metadata": {
-        "id": "yB2NDOO4oxrp"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os\n",
-        "from litellm import completion\n",
-        "\n",
-        "# Function to make both OpenAI and Azure completions\n",
-        "def make_completions():\n",
-        "    # Set your OpenAI API key\n",
-        "    os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
-        "\n",
-        "    # OpenAI completion\n",
-        "    openai_response = completion(\n",
-        "        model=\"gpt-3.5-turbo\",\n",
-        "        messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
-        "    )\n",
-        "\n",
-        "    print(\"OpenAI Response:\", openai_response)\n",
-        "\n",
-        "    # Set your Azure OpenAI API key and configuration\n",
-        "    os.environ[\"AZURE_API_KEY\"] = \"\"\n",
-        "    os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
-        "    os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
-        "\n",
-        "    # Azure OpenAI completion\n",
-        "    azure_response = completion(\n",
-        "        model=\"azure/your-azure-deployment\",\n",
-        "        messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
-        "    )\n",
-        "\n",
-        "    print(\"Azure OpenAI Response:\", azure_response)\n",
-        "\n",
-        "# Call the function to make both completions in one thread\n",
-        "make_completions()\n"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "HTBqwzxpnxab",
-        "outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
-      },
-      "execution_count": 23,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "OpenAI Response: {\n",
-            "  \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
-            "  \"object\": \"chat.completion\",\n",
-            "  \"created\": 1694710847,\n",
-            "  \"model\": \"gpt-3.5-turbo-0613\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"index\": 0,\n",
-            "      \"message\": {\n",
-            "        \"role\": \"assistant\",\n",
-            "        \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
-            "      },\n",
-            "      \"finish_reason\": \"stop\"\n",
-            "    }\n",
-            "  ],\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": 13,\n",
-            "    \"completion_tokens\": 29,\n",
-            "    \"total_tokens\": 42\n",
-            "  }\n",
-            "}\n",
-            "Azure OpenAI Response: {\n",
-            "  \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n",
-            "  \"object\": \"chat.completion\",\n",
-            "  \"created\": 1694710849,\n",
-            "  \"model\": \"gpt-35-turbo\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"index\": 0,\n",
-            "      \"finish_reason\": \"stop\",\n",
-            "      \"message\": {\n",
-            "        \"role\": \"assistant\",\n",
-            "        \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"usage\": {\n",
-            "    \"completion_tokens\": 29,\n",
-            "    \"prompt_tokens\": 14,\n",
-            "    \"total_tokens\": 43\n",
-            "  }\n",
-            "}\n"
-          ]
-        }
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Openai Response\n",
+      "\n",
+      "{\n",
+      "  \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n",
+      "  \"object\": \"chat.completion\",\n",
+      "  \"created\": 1694708958,\n",
+      "  \"model\": \"gpt-3.5-turbo-0613\",\n",
+      "  \"choices\": [\n",
+      "    {\n",
+      "      \"index\": 0,\n",
+      "      \"message\": {\n",
+      "        \"role\": \"assistant\",\n",
+      "        \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n",
+      "      },\n",
+      "      \"finish_reason\": \"stop\"\n",
+      "    }\n",
+      "  ],\n",
+      "  \"usage\": {\n",
+      "    \"prompt_tokens\": 13,\n",
+      "    \"completion_tokens\": 26,\n",
+      "    \"total_tokens\": 39\n",
+      "  }\n",
+      "}\n",
+      "Azure Response\n",
+      "\n",
+      "{\n",
+      "  \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n",
+      "  \"object\": \"chat.completion\",\n",
+      "  \"created\": 1694708960,\n",
+      "  \"model\": \"gpt-35-turbo\",\n",
+      "  \"choices\": [\n",
+      "    {\n",
+      "      \"index\": 0,\n",
+      "      \"finish_reason\": \"stop\",\n",
+      "      \"message\": {\n",
+      "        \"role\": \"assistant\",\n",
+      "        \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n",
+      "      }\n",
+      "    }\n",
+      "  ],\n",
+      "  \"usage\": {\n",
+      "    \"completion_tokens\": 27,\n",
+      "    \"prompt_tokens\": 14,\n",
+      "    \"total_tokens\": 41\n",
+      "  }\n",
+      "}\n"
+     ]
    }
-  ]
+   ],
+   "source": [
+    "from litellm import completion\n",
+    "\n",
+    "# openai configs\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
+    "\n",
+    "# azure openai configs\n",
+    "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
+    "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
+    "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
+    "\n",
+    "\n",
+    "# openai call\n",
+    "response = completion(\n",
+    "    model = \"gpt-3.5-turbo\",\n",
+    "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
+    ")\n",
+    "print(\"Openai Response\\n\")\n",
+    "print(response)\n",
+    "\n",
+    "\n",
+    "\n",
+    "# azure call\n",
+    "response = completion(\n",
+    "    model = \"azure/your-azure-deployment\",\n",
+    "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
+    ")\n",
+    "print(\"Azure Response\\n\")\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "dQMkM-diiKdE"
+   },
+   "source": [
+    "## Completion - Streaming"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "uVvJDVn4g1i1"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from litellm import completion\n",
+    "\n",
+    "# openai configs\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
+    "\n",
+    "# azure openai configs\n",
+    "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
+    "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
+    "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
+    "\n",
+    "\n",
+    "# openai call\n",
+    "response = completion(\n",
+    "    model = \"gpt-3.5-turbo\",\n",
+    "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
+    "    stream=True\n",
+    ")\n",
+    "print(\"OpenAI Streaming response\")\n",
+    "for chunk in response:\n",
+    "  print(chunk)\n",
+    "\n",
+    "# azure call\n",
+    "response = completion(\n",
+    "    model = \"azure/your-azure-deployment\",\n",
+    "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
+    "    stream=True\n",
+    ")\n",
+    "print(\"Azure Streaming response\")\n",
+    "for chunk in response:\n",
+    "  print(chunk)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "4xrOPnt-oqwm"
+   },
+   "source": [
+    "## Completion - Azure, OpenAI in separate threads"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "V5b5taJPjvC3"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import threading\n",
+    "from litellm import completion\n",
+    "\n",
+    "# Function to make a completion call\n",
+    "def make_completion(model, messages):\n",
+    "    response = completion(\n",
+    "        model=model,\n",
+    "        messages=messages\n",
+    "    )\n",
+    "\n",
+    "    print(f\"Response for {model}: {response}\")\n",
+    "\n",
+    "# openai configs\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
+    "\n",
+    "# azure openai configs\n",
+    "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
+    "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
+    "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
+    "\n",
+    "# Define the messages for the completions\n",
+    "messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
+    "\n",
+    "# Create threads for making the completions\n",
+    "thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n",
+    "thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n",
+    "\n",
+    "# Start both threads\n",
+    "thread1.start()\n",
+    "thread2.start()\n",
+    "\n",
+    "# Wait for both threads to finish\n",
+    "thread1.join()\n",
+    "thread2.join()\n",
+    "\n",
+    "print(\"Both completions are done.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "lx8DbMBqoAoN"
+   },
+   "source": [
+    "## Completion - Stress Test 10 requests in parallel\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "pHYANOlOkoDh"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import threading\n",
+    "from litellm import completion\n",
+    "\n",
+    "# Function to make a completion call\n",
+    "def make_completion(model, messages):\n",
+    "    response = completion(\n",
+    "        model=model,\n",
+    "        messages=messages\n",
+    "    )\n",
+    "\n",
+    "    print(f\"Response for {model}: {response}\")\n",
+    "\n",
+    "# Set your API keys\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
+    "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
+    "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
+    "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
+    "\n",
+    "# Define the messages for the completions\n",
+    "messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
+    "\n",
+    "# Create and start 10 threads for making completions\n",
+    "threads = []\n",
+    "for i in range(10):\n",
+    "    thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n",
+    "    threads.append(thread)\n",
+    "    thread.start()\n",
+    "\n",
+    "# Wait for all threads to finish\n",
+    "for thread in threads:\n",
+    "    thread.join()\n",
+    "\n",
+    "print(\"All completions are done.\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "yB2NDOO4oxrp"
+   },
+   "source": [
+    "## Completion - Azure, OpenAI in the same thread"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "HTBqwzxpnxab",
+    "outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "OpenAI Response: {\n",
+      "  \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
+      "  \"object\": \"chat.completion\",\n",
+      "  \"created\": 1694710847,\n",
+      "  \"model\": \"gpt-3.5-turbo-0613\",\n",
+      "  \"choices\": [\n",
+      "    {\n",
+      "      \"index\": 0,\n",
+      "      \"message\": {\n",
+      "        \"role\": \"assistant\",\n",
+      "        \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
+      "      },\n",
+      "      \"finish_reason\": \"stop\"\n",
+      "    }\n",
+      "  ],\n",
+      "  \"usage\": {\n",
+      "    \"prompt_tokens\": 13,\n",
+      "    \"completion_tokens\": 29,\n",
+      "    \"total_tokens\": 42\n",
+      "  }\n",
+      "}\n",
+      "Azure OpenAI Response: {\n",
+      "  \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n",
+      "  \"object\": \"chat.completion\",\n",
+      "  \"created\": 1694710849,\n",
+      "  \"model\": \"gpt-35-turbo\",\n",
+      "  \"choices\": [\n",
+      "    {\n",
+      "      \"index\": 0,\n",
+      "      \"finish_reason\": \"stop\",\n",
+      "      \"message\": {\n",
+      "        \"role\": \"assistant\",\n",
+      "        \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n",
+      "      }\n",
+      "    }\n",
+      "  ],\n",
+      "  \"usage\": {\n",
+      "    \"completion_tokens\": 29,\n",
+      "    \"prompt_tokens\": 14,\n",
+      "    \"total_tokens\": 43\n",
+      "  }\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from litellm import completion\n",
+    "\n",
+    "# Function to make both OpenAI and Azure completions\n",
+    "def make_completions():\n",
+    "    # Set your OpenAI API key\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
+    "\n",
+    "    # OpenAI completion\n",
+    "    openai_response = completion(\n",
+    "        model=\"gpt-3.5-turbo\",\n",
+    "        messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
+    "    )\n",
+    "\n",
+    "    print(\"OpenAI Response:\", openai_response)\n",
+    "\n",
+    "    # Set your Azure OpenAI API key and configuration\n",
+    "    os.environ[\"AZURE_API_KEY\"] = \"\"\n",
+    "    os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
+    "    os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
+    "\n",
+    "    # Azure OpenAI completion\n",
+    "    azure_response = completion(\n",
+    "        model=\"azure/your-azure-deployment\",\n",
+    "        messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
+    "    )\n",
+    "\n",
+    "    print(\"Azure OpenAI Response:\", azure_response)\n",
+    "\n",
+    "# Call the function to make both completions in one thread\n",
+    "make_completions()\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
--- a/cookbook/LiteLLM_Comparing_LLMs.ipynb
+++ b/cookbook/LiteLLM_Comparing_LLMs.ipynb
--- a/cookbook/LiteLLM_batch_completion.ipynb
+++ b/cookbook/LiteLLM_batch_completion.ipynb
@ -1,166 +1,163 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "MbLbs1tbISk-"
+   },
+   "source": [
+    "# LiteLLM Batch Completions Example\n",
+    "\n",
+    "* This tutorial walks through using `batch_completion`\n",
+    "* Docs: https://docs.litellm.ai/docs/completion/batching"
+   ]
  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# LiteLLM Batch Completions Example\n",
-        "\n",
-        "* This tutorial walks through using `batch_completion`\n",
-        "* Docs: https://docs.litellm.ai/docs/completion/batching"
-      ],
-      "metadata": {
-        "id": "MbLbs1tbISk-"
-      }
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "Ty6-ko_aDlPF"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install litellm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "KGhNJRUCIh1j"
+   },
+   "source": [
+    "## Import Batch Completion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "id": "LOtI43snDrSK"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from litellm import batch_completion\n",
+    "\n",
+    "# set your API_KEY\n",
+    "os.environ['ANTHROPIC_API_KEY'] = \"\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Xhv92NBaIpaw"
+   },
+   "source": [
+    "## Calling `litellm.batch_completion`\n",
+    "\n",
+    "In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
    },
+    "id": "yY7GIRLsDywu",
+    "outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
+   },
+   "outputs": [
    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Ty6-ko_aDlPF"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install litellm"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Import Batch Completion"
-      ],
-      "metadata": {
-        "id": "KGhNJRUCIh1j"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import litellm\n",
-        "import os\n",
-        "from litellm import batch_completion\n",
-        "\n",
-        "# set your API_KEY\n",
-        "os.environ['ANTHROPIC_API_KEY'] = \"\""
-      ],
-      "metadata": {
-        "id": "LOtI43snDrSK"
-      },
-      "execution_count": 7,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Calling `litellm.batch_completion`\n",
-        "\n",
-        "In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
-      ],
-      "metadata": {
-        "id": "Xhv92NBaIpaw"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import litellm\n",
-        "import os\n",
-        "from litellm import batch_completion\n",
-        "\n",
-        "os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
-        "\n",
-        "\n",
-        "responses = batch_completion(\n",
-        "    model=\"claude-2\",\n",
-        "    messages = [\n",
-        "        [\n",
-        "            {\n",
-        "                \"role\": \"user\",\n",
-        "                \"content\": \"good morning? \"\n",
-        "            }\n",
-        "        ],\n",
-        "        [\n",
-        "            {\n",
-        "                \"role\": \"user\",\n",
-        "                \"content\": \"what's the time? \"\n",
-        "            }\n",
-        "        ]\n",
-        "    ]\n",
-        ")\n",
-        "responses"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "yY7GIRLsDywu",
-        "outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
-      },
-      "execution_count": 11,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "[<ModelResponse at 0x7a164eed4450> JSON: {\n",
-              "   \"choices\": [\n",
-              "     {\n",
-              "       \"finish_reason\": \"stop\",\n",
-              "       \"index\": 0,\n",
-              "       \"message\": {\n",
-              "         \"content\": \" Good morning!\",\n",
-              "         \"role\": \"assistant\",\n",
-              "         \"logprobs\": null\n",
-              "       }\n",
-              "     }\n",
-              "   ],\n",
-              "   \"created\": 1694030351.309254,\n",
-              "   \"model\": \"claude-2\",\n",
-              "   \"usage\": {\n",
-              "     \"prompt_tokens\": 11,\n",
-              "     \"completion_tokens\": 3,\n",
-              "     \"total_tokens\": 14\n",
-              "   }\n",
-              " },\n",
-              " <ModelResponse at 0x7a164eed5800> JSON: {\n",
-              "   \"choices\": [\n",
-              "     {\n",
-              "       \"finish_reason\": \"stop\",\n",
-              "       \"index\": 0,\n",
-              "       \"message\": {\n",
-              "         \"content\": \" I'm an AI assistant created by Anthropic. I don't actually have a concept of the current time.\",\n",
-              "         \"role\": \"assistant\",\n",
-              "         \"logprobs\": null\n",
-              "       }\n",
-              "     }\n",
-              "   ],\n",
-              "   \"created\": 1694030352.1215081,\n",
-              "   \"model\": \"claude-2\",\n",
-              "   \"usage\": {\n",
-              "     \"prompt_tokens\": 13,\n",
-              "     \"completion_tokens\": 22,\n",
-              "     \"total_tokens\": 35\n",
-              "   }\n",
-              " }]"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 11
-        }
+     "data": {
+      "text/plain": [
+       "[<ModelResponse at 0x7a164eed4450> JSON: {\n",
+       "   \"choices\": [\n",
+       "     {\n",
+       "       \"finish_reason\": \"stop\",\n",
+       "       \"index\": 0,\n",
+       "       \"message\": {\n",
+       "         \"content\": \" Good morning!\",\n",
+       "         \"role\": \"assistant\",\n",
+       "         \"logprobs\": null\n",
+       "       }\n",
+       "     }\n",
+       "   ],\n",
+       "   \"created\": 1694030351.309254,\n",
+       "   \"model\": \"claude-2\",\n",
+       "   \"usage\": {\n",
+       "     \"prompt_tokens\": 11,\n",
+       "     \"completion_tokens\": 3,\n",
+       "     \"total_tokens\": 14\n",
+       "   }\n",
+       " },\n",
+       " <ModelResponse at 0x7a164eed5800> JSON: {\n",
+       "   \"choices\": [\n",
+       "     {\n",
+       "       \"finish_reason\": \"stop\",\n",
+       "       \"index\": 0,\n",
+       "       \"message\": {\n",
+       "         \"content\": \" I'm an AI assistant created by Anthropic. I don't actually have a concept of the current time.\",\n",
+       "         \"role\": \"assistant\",\n",
+       "         \"logprobs\": null\n",
+       "       }\n",
+       "     }\n",
+       "   ],\n",
+       "   \"created\": 1694030352.1215081,\n",
+       "   \"model\": \"claude-2\",\n",
+       "   \"usage\": {\n",
+       "     \"prompt_tokens\": 13,\n",
+       "     \"completion_tokens\": 22,\n",
+       "     \"total_tokens\": 35\n",
+       "   }\n",
+       " }]"
      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
    }
-  ]
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
+    "\n",
+    "\n",
+    "responses = batch_completion(\n",
+    "    model=\"claude-2\",\n",
+    "    messages = [\n",
+    "        [\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": \"good morning? \"\n",
+    "            }\n",
+    "        ],\n",
+    "        [\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": \"what's the time? \"\n",
+    "            }\n",
+    "        ]\n",
+    "    ]\n",
+    ")\n",
+    "responses"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
--- a/cookbook/Proxy_Batch_Users.ipynb
+++ b/cookbook/Proxy_Batch_Users.ipynb
@ -1,204 +1,205 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "680oRk1af-xJ"
-      },
-      "source": [
-        "# Environment Setup"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "X7TgJFn8f88p"
-      },
-      "outputs": [],
-      "source": [
-        "import csv\n",
-        "from typing import Optional\n",
-        "import httpx, json\n",
-        "import asyncio\n",
-        "\n",
-        "proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
-        "master_key = \"sk-1234\" # 👈 SET TO PROXY MASTER KEY"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "rauw8EOhgBz5"
-      },
-      "outputs": [],
-      "source": [
-        "## GLOBAL HTTP CLIENT ## - faster http calls\n",
-        "class HTTPHandler:\n",
-        "    def __init__(self, concurrent_limit=1000):\n",
-        "        # Create a client with a connection pool\n",
-        "        self.client = httpx.AsyncClient(\n",
-        "            limits=httpx.Limits(\n",
-        "                max_connections=concurrent_limit,\n",
-        "                max_keepalive_connections=concurrent_limit,\n",
-        "            )\n",
-        "        )\n",
-        "\n",
-        "    async def close(self):\n",
-        "        # Close the client when you're done with it\n",
-        "        await self.client.aclose()\n",
-        "\n",
-        "    async def get(\n",
-        "        self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None\n",
-        "    ):\n",
-        "        response = await self.client.get(url, params=params, headers=headers)\n",
-        "        return response\n",
-        "\n",
-        "    async def post(\n",
-        "        self,\n",
-        "        url: str,\n",
-        "        data: Optional[dict] = None,\n",
-        "        params: Optional[dict] = None,\n",
-        "        headers: Optional[dict] = None,\n",
-        "    ):\n",
-        "        try:\n",
-        "            response = await self.client.post(\n",
-        "                url, data=data, params=params, headers=headers\n",
-        "            )\n",
-        "            return response\n",
-        "        except Exception as e:\n",
-        "            raise e\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "7LXN8zaLgOie"
-      },
-      "source": [
-        "# Import Sheet\n",
-        "\n",
-        "\n",
-        "Format: | ID | Name | Max Budget |"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "oiED0usegPGf"
-      },
-      "outputs": [],
-      "source": [
-        "async def import_sheet():\n",
-        "    tasks = []\n",
-        "    http_client = HTTPHandler()\n",
-        "    with open('my-batch-sheet.csv', 'r') as file:\n",
-        "        csv_reader = csv.DictReader(file)\n",
-        "        for row in csv_reader:\n",
-        "            task = create_user(client=http_client, user_id=row['ID'], max_budget=row['Max Budget'], user_name=row['Name'])\n",
-        "            tasks.append(task)\n",
-        "            # print(f\"ID: {row['ID']}, Name: {row['Name']}, Max Budget: {row['Max Budget']}\")\n",
-        "\n",
-        "    keys = await asyncio.gather(*tasks)\n",
-        "\n",
-        "    with open('my-batch-sheet_new.csv', 'w', newline='') as new_file:\n",
-        "        fieldnames = ['ID', 'Name', 'Max Budget', 'keys']\n",
-        "        csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames)\n",
-        "        csv_writer.writeheader()\n",
-        "\n",
-        "        with open('my-batch-sheet.csv', 'r') as file:\n",
-        "            csv_reader = csv.DictReader(file)\n",
-        "            for i, row in enumerate(csv_reader):\n",
-        "                row['keys'] = keys[i]  # Add the 'keys' value from the corresponding task result\n",
-        "                csv_writer.writerow(row)\n",
-        "\n",
-        "    await http_client.close()\n",
-        "\n",
-        "asyncio.run(import_sheet())"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "E7M0Li_UgJeZ"
-      },
-      "source": [
-        "# Create Users + Keys\n",
-        "\n",
-        "- Creates a user\n",
-        "- Creates a key with max budget"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "NZudRFujf7j-"
-      },
-      "outputs": [],
-      "source": [
-        "\n",
-        "async def create_key_with_alias(client: HTTPHandler, user_id: str, max_budget: float):\n",
-        "    global proxy_base_url\n",
-        "    if not proxy_base_url.endswith(\"/\"):\n",
-        "        proxy_base_url += \"/\"\n",
-        "    url = proxy_base_url + \"key/generate\"\n",
-        "\n",
-        "    # call /key/generate\n",
-        "    print(\"CALLING /KEY/GENERATE\")\n",
-        "    response = await client.post(\n",
-        "        url=url,\n",
-        "        headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
-        "        data=json.dumps({\n",
-        "            \"user_id\": user_id,\n",
-        "            \"key_alias\": f\"{user_id}-key\",\n",
-        "            \"max_budget\": max_budget # 👈 KEY CHANGE: SETS MAX BUDGET PER KEY\n",
-        "        })\n",
-        "    )\n",
-        "    print(f\"response: {response.text}\")\n",
-        "    return response.json()[\"key\"]\n",
-        "\n",
-        "async def create_user(client: HTTPHandler, user_id: str, max_budget: float, user_name: str):\n",
-        "    \"\"\"\n",
-        "    - call /user/new\n",
-        "    - create key for user\n",
-        "    \"\"\"\n",
-        "    global proxy_base_url\n",
-        "    if not proxy_base_url.endswith(\"/\"):\n",
-        "        proxy_base_url += \"/\"\n",
-        "    url = proxy_base_url + \"user/new\"\n",
-        "\n",
-        "    # call /user/new\n",
-        "    await client.post(\n",
-        "        url=url,\n",
-        "        headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
-        "        data=json.dumps({\n",
-        "            \"user_id\": user_id,\n",
-        "            \"user_alias\": user_name,\n",
-        "            \"auto_create_key\": False,\n",
-        "            # \"max_budget\": max_budget # 👈 [OPTIONAL] Sets max budget per user (if you want to set a max budget across keys)\n",
-        "        })\n",
-        "    )\n",
-        "\n",
-        "    # create key for user\n",
-        "    return await create_key_with_alias(client=client, user_id=user_id, max_budget=max_budget)\n"
-      ]
-    }
-  ],
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "680oRk1af-xJ"
+   },
+   "source": [
+    "# Environment Setup"
+   ]
  },
-  "nbformat": 4,
-  "nbformat_minor": 0
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "X7TgJFn8f88p"
+   },
+   "outputs": [],
+   "source": [
+    "import csv\n",
+    "from typing import Optional\n",
+    "import httpx\n",
+    "import json\n",
+    "import asyncio\n",
+    "\n",
+    "proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
+    "master_key = \"sk-1234\" # 👈 SET TO PROXY MASTER KEY"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "rauw8EOhgBz5"
+   },
+   "outputs": [],
+   "source": [
+    "## GLOBAL HTTP CLIENT ## - faster http calls\n",
+    "class HTTPHandler:\n",
+    "    def __init__(self, concurrent_limit=1000):\n",
+    "        # Create a client with a connection pool\n",
+    "        self.client = httpx.AsyncClient(\n",
+    "            limits=httpx.Limits(\n",
+    "                max_connections=concurrent_limit,\n",
+    "                max_keepalive_connections=concurrent_limit,\n",
+    "            )\n",
+    "        )\n",
+    "\n",
+    "    async def close(self):\n",
+    "        # Close the client when you're done with it\n",
+    "        await self.client.aclose()\n",
+    "\n",
+    "    async def get(\n",
+    "        self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None\n",
+    "    ):\n",
+    "        response = await self.client.get(url, params=params, headers=headers)\n",
+    "        return response\n",
+    "\n",
+    "    async def post(\n",
+    "        self,\n",
+    "        url: str,\n",
+    "        data: Optional[dict] = None,\n",
+    "        params: Optional[dict] = None,\n",
+    "        headers: Optional[dict] = None,\n",
+    "    ):\n",
+    "        try:\n",
+    "            response = await self.client.post(\n",
+    "                url, data=data, params=params, headers=headers\n",
+    "            )\n",
+    "            return response\n",
+    "        except Exception as e:\n",
+    "            raise e\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "7LXN8zaLgOie"
+   },
+   "source": [
+    "# Import Sheet\n",
+    "\n",
+    "\n",
+    "Format: | ID | Name | Max Budget |"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "oiED0usegPGf"
+   },
+   "outputs": [],
+   "source": [
+    "async def import_sheet():\n",
+    "    tasks = []\n",
+    "    http_client = HTTPHandler()\n",
+    "    with open('my-batch-sheet.csv', 'r') as file:\n",
+    "        csv_reader = csv.DictReader(file)\n",
+    "        for row in csv_reader:\n",
+    "            task = create_user(client=http_client, user_id=row['ID'], max_budget=row['Max Budget'], user_name=row['Name'])\n",
+    "            tasks.append(task)\n",
+    "            # print(f\"ID: {row['ID']}, Name: {row['Name']}, Max Budget: {row['Max Budget']}\")\n",
+    "\n",
+    "    keys = await asyncio.gather(*tasks)\n",
+    "\n",
+    "    with open('my-batch-sheet_new.csv', 'w', newline='') as new_file:\n",
+    "        fieldnames = ['ID', 'Name', 'Max Budget', 'keys']\n",
+    "        csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames)\n",
+    "        csv_writer.writeheader()\n",
+    "\n",
+    "        with open('my-batch-sheet.csv', 'r') as file:\n",
+    "            csv_reader = csv.DictReader(file)\n",
+    "            for i, row in enumerate(csv_reader):\n",
+    "                row['keys'] = keys[i]  # Add the 'keys' value from the corresponding task result\n",
+    "                csv_writer.writerow(row)\n",
+    "\n",
+    "    await http_client.close()\n",
+    "\n",
+    "asyncio.run(import_sheet())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "E7M0Li_UgJeZ"
+   },
+   "source": [
+    "# Create Users + Keys\n",
+    "\n",
+    "- Creates a user\n",
+    "- Creates a key with max budget"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "NZudRFujf7j-"
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "async def create_key_with_alias(client: HTTPHandler, user_id: str, max_budget: float):\n",
+    "    global proxy_base_url\n",
+    "    if not proxy_base_url.endswith(\"/\"):\n",
+    "        proxy_base_url += \"/\"\n",
+    "    url = proxy_base_url + \"key/generate\"\n",
+    "\n",
+    "    # call /key/generate\n",
+    "    print(\"CALLING /KEY/GENERATE\")\n",
+    "    response = await client.post(\n",
+    "        url=url,\n",
+    "        headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
+    "        data=json.dumps({\n",
+    "            \"user_id\": user_id,\n",
+    "            \"key_alias\": f\"{user_id}-key\",\n",
+    "            \"max_budget\": max_budget # 👈 KEY CHANGE: SETS MAX BUDGET PER KEY\n",
+    "        })\n",
+    "    )\n",
+    "    print(f\"response: {response.text}\")\n",
+    "    return response.json()[\"key\"]\n",
+    "\n",
+    "async def create_user(client: HTTPHandler, user_id: str, max_budget: float, user_name: str):\n",
+    "    \"\"\"\n",
+    "    - call /user/new\n",
+    "    - create key for user\n",
+    "    \"\"\"\n",
+    "    global proxy_base_url\n",
+    "    if not proxy_base_url.endswith(\"/\"):\n",
+    "        proxy_base_url += \"/\"\n",
+    "    url = proxy_base_url + \"user/new\"\n",
+    "\n",
+    "    # call /user/new\n",
+    "    await client.post(\n",
+    "        url=url,\n",
+    "        headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
+    "        data=json.dumps({\n",
+    "            \"user_id\": user_id,\n",
+    "            \"user_alias\": user_name,\n",
+    "            \"auto_create_key\": False,\n",
+    "            # \"max_budget\": max_budget # 👈 [OPTIONAL] Sets max budget per user (if you want to set a max budget across keys)\n",
+    "        })\n",
+    "    )\n",
+    "\n",
+    "    # create key for user\n",
+    "    return await create_key_with_alias(client=client, user_id=user_id, max_budget=max_budget)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
--- a/cookbook/TogetherAI_liteLLM.ipynb
+++ b/cookbook/TogetherAI_liteLLM.ipynb
--- a/cookbook/Using_Nemo_Guardrails_with_LiteLLM_Server.ipynb
+++ b/cookbook/Using_Nemo_Guardrails_with_LiteLLM_Server.ipynb
@ -1,159 +1,157 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "eKXncoQbU_2j"
+   },
+   "source": [
+    "# Using Nemo-Guardrails with LiteLLM Server\n",
+    "\n",
+    "[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
+   ]
  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# Using Nemo-Guardrails with LiteLLM Server\n",
-        "\n",
-        "[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
-      ],
-      "metadata": {
-        "id": "eKXncoQbU_2j"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Using with Bedrock\n",
-        "\n",
-        "`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
-      ],
-      "metadata": {
-        "id": "ZciYaLwvuFbu"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "pip install nemoguardrails langchain"
-      ],
-      "metadata": {
-        "id": "vOUwGSJ2Vsy3"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "xXEJNxe7U0IN"
-      },
-      "outputs": [],
-      "source": [
-        "import openai\n",
-        "from langchain.chat_models import ChatOpenAI\n",
-        "\n",
-        "llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
-        "\n",
-        "from nemoguardrails import LLMRails, RailsConfig\n",
-        "\n",
-        "config = RailsConfig.from_path(\"./config.yml\")\n",
-        "app = LLMRails(config, llm=llm)\n",
-        "\n",
-        "new_message = app.generate(messages=[{\n",
-        "    \"role\": \"user\",\n",
-        "    \"content\": \"Hello! What can you do for me?\"\n",
-        "}])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Using with TogetherAI\n",
-        "\n",
-        "1. You can either set this in the server environment:\n",
-        "`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
-        "\n",
-        "2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
-      ],
-      "metadata": {
-        "id": "vz5n00qyuKjp"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import openai\n",
-        "from langchain.chat_models import ChatOpenAI\n",
-        "\n",
-        "llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
-        "\n",
-        "from nemoguardrails import LLMRails, RailsConfig\n",
-        "\n",
-        "config = RailsConfig.from_path(\"./config.yml\")\n",
-        "app = LLMRails(config, llm=llm)\n",
-        "\n",
-        "new_message = app.generate(messages=[{\n",
-        "    \"role\": \"user\",\n",
-        "    \"content\": \"Hello! What can you do for me?\"\n",
-        "}])"
-      ],
-      "metadata": {
-        "id": "XK1sk-McuhpE"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### CONFIG.YML\n",
-        "\n",
-        "save this example `config.yml` in your current directory"
-      ],
-      "metadata": {
-        "id": "8A1KWKnzuxAS"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# instructions:\n",
-        "#   - type: general\n",
-        "#     content: |\n",
-        "#       Below is a conversation between a bot and a user about the recent job reports.\n",
-        "#       The bot is factual and concise. If the bot does not know the answer to a\n",
-        "#       question, it truthfully says it does not know.\n",
-        "\n",
-        "# sample_conversation: |\n",
-        "#   user \"Hello there!\"\n",
-        "#     express greeting\n",
-        "#   bot express greeting\n",
-        "#     \"Hello! How can I assist you today?\"\n",
-        "#   user \"What can you do for me?\"\n",
-        "#     ask about capabilities\n",
-        "#   bot respond about capabilities\n",
-        "#     \"I am an AI assistant that helps answer mathematical questions. My core mathematical skills are powered by wolfram alpha.\"\n",
-        "#   user \"What's 2+2?\"\n",
-        "#     ask math question\n",
-        "#   bot responds to math question\n",
-        "#     \"2+2 is equal to 4.\"\n",
-        "\n",
-        "# models:\n",
-        "#   - type: main\n",
-        "#     engine: openai\n",
-        "#     model: claude-instant-1"
-      ],
-      "metadata": {
-        "id": "NKN1GmSvu0Cx"
-      },
-      "execution_count": null,
-      "outputs": []
-    }
-  ]
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ZciYaLwvuFbu"
+   },
+   "source": [
+    "## Using with Bedrock\n",
+    "\n",
+    "`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "vOUwGSJ2Vsy3"
+   },
+   "outputs": [],
+   "source": [
+    "pip install nemoguardrails langchain"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "xXEJNxe7U0IN"
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import ChatOpenAI\n",
+    "\n",
+    "llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
+    "\n",
+    "from nemoguardrails import LLMRails, RailsConfig\n",
+    "\n",
+    "config = RailsConfig.from_path(\"./config.yml\")\n",
+    "app = LLMRails(config, llm=llm)\n",
+    "\n",
+    "new_message = app.generate(messages=[{\n",
+    "    \"role\": \"user\",\n",
+    "    \"content\": \"Hello! What can you do for me?\"\n",
+    "}])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "vz5n00qyuKjp"
+   },
+   "source": [
+    "## Using with TogetherAI\n",
+    "\n",
+    "1. You can either set this in the server environment:\n",
+    "`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
+    "\n",
+    "2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "XK1sk-McuhpE"
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import ChatOpenAI\n",
+    "\n",
+    "llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
+    "\n",
+    "from nemoguardrails import LLMRails, RailsConfig\n",
+    "\n",
+    "config = RailsConfig.from_path(\"./config.yml\")\n",
+    "app = LLMRails(config, llm=llm)\n",
+    "\n",
+    "new_message = app.generate(messages=[{\n",
+    "    \"role\": \"user\",\n",
+    "    \"content\": \"Hello! What can you do for me?\"\n",
+    "}])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "8A1KWKnzuxAS"
+   },
+   "source": [
+    "### CONFIG.YML\n",
+    "\n",
+    "save this example `config.yml` in your current directory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "NKN1GmSvu0Cx"
+   },
+   "outputs": [],
+   "source": [
+    "# instructions:\n",
+    "#   - type: general\n",
+    "#     content: |\n",
+    "#       Below is a conversation between a bot and a user about the recent job reports.\n",
+    "#       The bot is factual and concise. If the bot does not know the answer to a\n",
+    "#       question, it truthfully says it does not know.\n",
+    "\n",
+    "# sample_conversation: |\n",
+    "#   user \"Hello there!\"\n",
+    "#     express greeting\n",
+    "#   bot express greeting\n",
+    "#     \"Hello! How can I assist you today?\"\n",
+    "#   user \"What can you do for me?\"\n",
+    "#     ask about capabilities\n",
+    "#   bot respond about capabilities\n",
+    "#     \"I am an AI assistant that helps answer mathematical questions. My core mathematical skills are powered by wolfram alpha.\"\n",
+    "#   user \"What's 2+2?\"\n",
+    "#     ask math question\n",
+    "#   bot responds to math question\n",
+    "#     \"2+2 is equal to 4.\"\n",
+    "\n",
+    "# models:\n",
+    "#   - type: main\n",
+    "#     engine: openai\n",
+    "#     model: claude-instant-1"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
--- a/cookbook/benchmark/eval_suites_mlflow_autoevals/auto_evals.py
+++ b/cookbook/benchmark/eval_suites_mlflow_autoevals/auto_evals.py
@ -1,16 +1,12 @@
-import sys, os
-import traceback
 from dotenv import load_dotenv

 load_dotenv()

 import litellm
-from litellm import embedding, completion, completion_cost

 from autoevals.llm import *

 ###################
-import litellm

 # litellm completion call
 question = "which country has the highest population"
--- a/cookbook/codellama-server/main.py
+++ b/cookbook/codellama-server/main.py
@ -1,11 +1,12 @@
 import traceback
-from flask import Flask, request, jsonify, abort, Response
+from flask import Flask, request, Response
 from flask_cors import CORS
-import traceback
 import litellm
 from util import handle_error
 from litellm import completion
-import os, dotenv, time
+import os
+import dotenv
+import time
 import json

 dotenv.load_dotenv()
@ -20,9 +21,9 @@ verbose = True

 # litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/
 ######### PROMPT LOGGING ##########
-os.environ[
-    "PROMPTLAYER_API_KEY"
-] = ""  # set your promptlayer key here - https://promptlayer.com/
+os.environ["PROMPTLAYER_API_KEY"] = (
+    ""  # set your promptlayer key here - https://promptlayer.com/
+)

 # set callbacks
 litellm.success_callback = ["promptlayer"]
@ -57,9 +58,9 @@ def api_completion():
    try:
        if "prompt" not in data:
            raise ValueError("data needs to have prompt")
-        data[
-            "model"
-        ] = "togethercomputer/CodeLlama-34b-Instruct"  # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
+        data["model"] = (
+            "togethercomputer/CodeLlama-34b-Instruct"  # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
+        )
        # COMPLETION CALL
        system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."
        messages = [
@ -75,7 +76,7 @@ def api_completion():
            "stream" in data and data["stream"] == True
        ):  # use generate_responses to stream responses
            return Response(data_generator(response), mimetype="text/event-stream")
-    except Exception as e:
+    except Exception:
        # call handle_error function
        print_verbose(f"Got Error api_completion(): {traceback.format_exc()}")
        ## LOG FAILURE
--- a/cookbook/community-resources/get_hf_models.py
+++ b/cookbook/community-resources/get_hf_models.py
@ -1,5 +1,4 @@
 import requests
-from urllib.parse import urlparse, parse_qs


 def get_next_url(response):
--- a/cookbook/liteLLM_Baseten.ipynb
+++ b/cookbook/liteLLM_Baseten.ipynb
@ -1,238 +1,237 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "gZx-wHJapG5w"
+   },
+   "source": [
+    "# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
+    "\n",
+    "* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
+    "* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
+    "* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
+    "\n",
+    "\n",
+    "## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
+    "Example call\n",
+    "```python\n",
+    "model = \"q841o8w\" # baseten model version ID\n",
+    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
+    "```"
+   ]
  },
-  "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "4JSRa0QVogPo"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install litellm==0.1.399\n",
+    "!pip install baseten urllib3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "id": "VEukLhDzo4vw"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from litellm import completion"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "4STYM2OHFNlc"
+   },
+   "source": [
+    "## Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "id": "DorpLxw1FHbC"
+   },
+   "outputs": [],
+   "source": [
+    "os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
+    "messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "syF3dTdKFSQQ"
+   },
+   "source": [
+    "## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
+    "### Pass Your Baseten model `Version ID` as `model`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "rPgSoMlsojz0",
+    "outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
+   },
+   "outputs": [
    {
-      "cell_type": "markdown",
-      "source": [
-        "# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
-        "\n",
-        "* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
-        "* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
-        "* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
-        "\n",
-        "\n",
-        "## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
-        "Example call\n",
-        "```python\n",
-        "model = \"q841o8w\" # baseten model version ID\n",
-        "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
-        "```"
-      ],
-      "metadata": {
-        "id": "gZx-wHJapG5w"
-      }
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32mINFO\u001b[0m API key set.\n",
+      "INFO:baseten:API key set.\n"
+     ]
    },
    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "4JSRa0QVogPo"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install litellm==0.1.399\n",
-        "!pip install baseten urllib3"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os\n",
-        "import litellm\n",
-        "from litellm import completion"
-      ],
-      "metadata": {
-        "id": "VEukLhDzo4vw"
-      },
-      "execution_count": 2,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Setup"
-      ],
-      "metadata": {
-        "id": "4STYM2OHFNlc"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
-        "messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
-      ],
-      "metadata": {
-        "id": "DorpLxw1FHbC"
-      },
-      "execution_count": 21,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
-        "### Pass Your Baseten model `Version ID` as `model`"
-      ],
-      "metadata": {
-        "id": "syF3dTdKFSQQ"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "model = \"qvv0xeq\"\n",
-        "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
-        "response"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "rPgSoMlsojz0",
-        "outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
-      },
-      "execution_count": 18,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\u001b[32mINFO\u001b[0m API key set.\n",
-            "INFO:baseten:API key set.\n"
-          ]
-        },
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "{'choices': [{'finish_reason': 'stop',\n",
-              "   'index': 0,\n",
-              "   'message': {'role': 'assistant',\n",
-              "    'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
-              " 'created': 1692135883.699066,\n",
-              " 'model': 'qvv0xeq'}"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 18
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
-        "### Pass Your Baseten model `Version ID` as `model`"
-      ],
-      "metadata": {
-        "id": "7n21UroEGCGa"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "model = \"q841o8w\"\n",
-        "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
-        "response"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "uLVWFH899lAF",
-        "outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
-      },
-      "execution_count": 19,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\u001b[32mINFO\u001b[0m API key set.\n",
-            "INFO:baseten:API key set.\n"
-          ]
-        },
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "{'choices': [{'finish_reason': 'stop',\n",
-              "   'index': 0,\n",
-              "   'message': {'role': 'assistant',\n",
-              "    'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
-              " 'created': 1692135900.2806294,\n",
-              " 'model': 'q841o8w'}"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 19
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
-        "### Pass Your Baseten model `Version ID` as `model`"
-      ],
-      "metadata": {
-        "id": "6-TFwmPAGPXq"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "model = \"31dxrj3\"\n",
-        "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
-        "response"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "gbeYZOrUE_Bp",
-        "outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
-      },
-      "execution_count": 20,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\u001b[32mINFO\u001b[0m API key set.\n",
-            "INFO:baseten:API key set.\n"
-          ]
-        },
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "{'choices': [{'finish_reason': 'stop',\n",
-              "   'index': 0,\n",
-              "   'message': {'role': 'assistant',\n",
-              "    'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
-              " 'created': 1692135914.7472186,\n",
-              " 'model': '31dxrj3'}"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 20
-        }
+     "data": {
+      "text/plain": [
+       "{'choices': [{'finish_reason': 'stop',\n",
+       "   'index': 0,\n",
+       "   'message': {'role': 'assistant',\n",
+       "    'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
+       " 'created': 1692135883.699066,\n",
+       " 'model': 'qvv0xeq'}"
      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
    }
-  ]
+   ],
+   "source": [
+    "model = \"qvv0xeq\"\n",
+    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
+    "response"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "7n21UroEGCGa"
+   },
+   "source": [
+    "## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
+    "### Pass Your Baseten model `Version ID` as `model`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "uLVWFH899lAF",
+    "outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32mINFO\u001b[0m API key set.\n",
+      "INFO:baseten:API key set.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'choices': [{'finish_reason': 'stop',\n",
+       "   'index': 0,\n",
+       "   'message': {'role': 'assistant',\n",
+       "    'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
+       " 'created': 1692135900.2806294,\n",
+       " 'model': 'q841o8w'}"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = \"q841o8w\"\n",
+    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
+    "response"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "6-TFwmPAGPXq"
+   },
+   "source": [
+    "## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
+    "### Pass Your Baseten model `Version ID` as `model`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "gbeYZOrUE_Bp",
+    "outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32mINFO\u001b[0m API key set.\n",
+      "INFO:baseten:API key set.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'choices': [{'finish_reason': 'stop',\n",
+       "   'index': 0,\n",
+       "   'message': {'role': 'assistant',\n",
+       "    'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
+       " 'created': 1692135914.7472186,\n",
+       " 'model': '31dxrj3'}"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = \"31dxrj3\"\n",
+    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
+    "response"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
--- a/cookbook/liteLLM_Langchain_Demo.ipynb
+++ b/cookbook/liteLLM_Langchain_Demo.ipynb
@ -1,201 +1,195 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "5hwntUxTMxEk"
+   },
+   "source": [
+    "# Langchain liteLLM Demo Notebook\n",
+    "## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
+    "Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
+    "\n",
+    "Call all LLM models using the same I/O interface\n",
+    "\n",
+    "Example usage\n",
+    "```python\n",
+    "ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
+    "ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
+    "ChatLiteLLM(model=\"command-nightly\")\n",
+    "ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
+    "```"
+   ]
  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# Langchain liteLLM Demo Notebook\n",
-        "## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
-        "Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
-        "\n",
-        "Call all LLM models using the same I/O interface\n",
-        "\n",
-        "Example usage\n",
-        "```python\n",
-        "ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
-        "ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
-        "ChatLiteLLM(model=\"command-nightly\")\n",
-        "ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
-        "```"
-      ],
-      "metadata": {
-        "id": "5hwntUxTMxEk"
-      }
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "aPNAUsCvB6Sv"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install litellm langchain"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "id": "MOhRaVnhB-0J"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from langchain.chat_models import ChatLiteLLM\n",
+    "from langchain.schema import HumanMessage"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
    },
+    "id": "TahkCtlmCD65",
+    "outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
+   },
+   "outputs": [
    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "aPNAUsCvB6Sv"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install litellm langchain"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os\n",
-        "from langchain.chat_models import ChatLiteLLM\n",
-        "from langchain.prompts.chat import (\n",
-        "    ChatPromptTemplate,\n",
-        "    SystemMessagePromptTemplate,\n",
-        "    AIMessagePromptTemplate,\n",
-        "    HumanMessagePromptTemplate,\n",
-        ")\n",
-        "from langchain.schema import AIMessage, HumanMessage, SystemMessage"
-      ],
-      "metadata": {
-        "id": "MOhRaVnhB-0J"
-      },
-      "execution_count": 2,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "os.environ['OPENAI_API_KEY'] = \"\"\n",
-        "chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
-        "messages = [\n",
-        "    HumanMessage(\n",
-        "        content=\"what model are you\"\n",
-        "    )\n",
-        "]\n",
-        "chat(messages)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "TahkCtlmCD65",
-        "outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
-      },
-      "execution_count": 17,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 17
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
-        "chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
-        "messages = [\n",
-        "    HumanMessage(\n",
-        "        content=\"what model are you\"\n",
-        "    )\n",
-        "]\n",
-        "chat(messages)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "uXNDyU4jChcs",
-        "outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
-      },
-      "execution_count": 23,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 23
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
-        "chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
-        "messages = [\n",
-        "    HumanMessage(\n",
-        "        content=\"what model are you?\"\n",
-        "    )\n",
-        "]\n",
-        "chat(messages)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "czbDJRKcC7BV",
-        "outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
-      },
-      "execution_count": 27,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 27
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "os.environ['COHERE_API_KEY'] = \"\"\n",
-        "chat = ChatLiteLLM(model=\"command-nightly\")\n",
-        "messages = [\n",
-        "    HumanMessage(\n",
-        "        content=\"what model are you?\"\n",
-        "    )\n",
-        "]\n",
-        "chat(messages)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "tZxpq5PDDY9Y",
-        "outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
-      },
-      "execution_count": 30,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 30
-        }
+     "data": {
+      "text/plain": [
+       "AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
    }
-  ]
+   ],
+   "source": [
+    "os.environ['OPENAI_API_KEY'] = \"\"\n",
+    "chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
+    "messages = [\n",
+    "    HumanMessage(\n",
+    "        content=\"what model are you\"\n",
+    "    )\n",
+    "]\n",
+    "chat(messages)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "uXNDyU4jChcs",
+    "outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
+    "chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
+    "messages = [\n",
+    "    HumanMessage(\n",
+    "        content=\"what model are you\"\n",
+    "    )\n",
+    "]\n",
+    "chat(messages)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "czbDJRKcC7BV",
+    "outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
+    "chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
+    "messages = [\n",
+    "    HumanMessage(\n",
+    "        content=\"what model are you?\"\n",
+    "    )\n",
+    "]\n",
+    "chat(messages)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "tZxpq5PDDY9Y",
+    "outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "os.environ['COHERE_API_KEY'] = \"\"\n",
+    "chat = ChatLiteLLM(model=\"command-nightly\")\n",
+    "messages = [\n",
+    "    HumanMessage(\n",
+    "        content=\"what model are you?\"\n",
+    "    )\n",
+    "]\n",
+    "chat(messages)"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
--- a/cookbook/liteLLM_VertextAI_Example.ipynb
+++ b/cookbook/liteLLM_VertextAI_Example.ipynb
@ -43,7 +43,7 @@
   "source": [
    "# set you Vertex AI configs\n",
    "import litellm\n",
-    "from litellm import embedding, completion\n",
+    "from litellm import completion\n",
    "\n",
    "litellm.vertex_project = \"hardy-device-386718\"\n",
    "litellm.vertex_location = \"us-central1\""
--- a/cookbook/liteLLM_function_calling.ipynb
+++ b/cookbook/liteLLM_function_calling.ipynb
@ -1,331 +1,331 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "vnvlwUDZK7VA"
+   },
+   "source": [
+    "## Demo Notebook of Function Calling with liteLLM\n",
+    "- Supported Providers for Function Calling\n",
+    "  - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
+    "- In this notebook we use function calling with `litellm.completion()`"
+   ]
  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Demo Notebook of Function Calling with liteLLM\n",
-        "- Supported Providers for Function Calling\n",
-        "  - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
-        "- In this notebook we use function calling with `litellm.completion()`"
-      ],
-      "metadata": {
-        "id": "vnvlwUDZK7VA"
-      }
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "KrINCwRfLgZV"
+   },
+   "outputs": [],
+   "source": [
+    "## Install liteLLM\n",
+    "!pip install litellm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "id": "nK7zR5OgLlh2"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from litellm import completion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "id": "dCQlyBxKLqbA"
+   },
+   "outputs": [],
+   "source": [
+    "os.environ['OPENAI_API_KEY'] = \"\" #@param"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "gfdGv-FMRCdX"
+   },
+   "source": [
+    "## Define Messages, Functions\n",
+    "We create a get_current_weather() function and pass that to GPT 3.5\n",
+    "\n",
+    "See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "id": "ERzsP1sfM19C"
+   },
+   "outputs": [],
+   "source": [
+    "messages = [\n",
+    "    {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
+    "]\n",
+    "\n",
+    "def get_current_weather(location):\n",
+    "  if location == \"Boston, MA\":\n",
+    "    return \"The weather is 12F\"\n",
+    "\n",
+    "functions = [\n",
+    "    {\n",
+    "      \"name\": \"get_current_weather\",\n",
+    "      \"description\": \"Get the current weather in a given location\",\n",
+    "      \"parameters\": {\n",
+    "        \"type\": \"object\",\n",
+    "        \"properties\": {\n",
+    "          \"location\": {\n",
+    "            \"type\": \"string\",\n",
+    "            \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
+    "          },\n",
+    "          \"unit\": {\n",
+    "            \"type\": \"string\",\n",
+    "            \"enum\": [\"celsius\", \"fahrenheit\"]\n",
+    "          }\n",
+    "        },\n",
+    "        \"required\": [\"location\"]\n",
+    "      }\n",
+    "    }\n",
+    "  ]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "NX6by2VuRPnp"
+   },
+   "source": [
+    "## Call gpt-3.5-turbo-0613 to Decide what Function to call"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
    },
+    "id": "QVoJ5PtxMlVx",
+    "outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
+   },
+   "outputs": [
    {
-      "cell_type": "code",
-      "source": [
-        "## Install liteLLM\n",
-        "!pip install litellm"
-      ],
-      "metadata": {
-        "id": "KrINCwRfLgZV"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os, litellm\n",
-        "from litellm import completion"
-      ],
-      "metadata": {
-        "id": "nK7zR5OgLlh2"
-      },
-      "execution_count": 2,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "os.environ['OPENAI_API_KEY'] = \"\" #@param"
-      ],
-      "metadata": {
-        "id": "dCQlyBxKLqbA"
-      },
-      "execution_count": 27,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Define Messages, Functions\n",
-        "We create a get_current_weather() function and pass that to GPT 3.5\n",
-        "\n",
-        "See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
-      ],
-      "metadata": {
-        "id": "gfdGv-FMRCdX"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "messages = [\n",
-        "    {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
-        "]\n",
-        "\n",
-        "def get_current_weather(location):\n",
-        "  if location == \"Boston, MA\":\n",
-        "    return \"The weather is 12F\"\n",
-        "\n",
-        "functions = [\n",
-        "    {\n",
-        "      \"name\": \"get_current_weather\",\n",
-        "      \"description\": \"Get the current weather in a given location\",\n",
-        "      \"parameters\": {\n",
-        "        \"type\": \"object\",\n",
-        "        \"properties\": {\n",
-        "          \"location\": {\n",
-        "            \"type\": \"string\",\n",
-        "            \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
-        "          },\n",
-        "          \"unit\": {\n",
-        "            \"type\": \"string\",\n",
-        "            \"enum\": [\"celsius\", \"fahrenheit\"]\n",
-        "          }\n",
-        "        },\n",
-        "        \"required\": [\"location\"]\n",
-        "      }\n",
-        "    }\n",
-        "  ]"
-      ],
-      "metadata": {
-        "id": "ERzsP1sfM19C"
-      },
-      "execution_count": 25,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Call gpt-3.5-turbo-0613 to Decide what Function to call"
-      ],
-      "metadata": {
-        "id": "NX6by2VuRPnp"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
-        "print(response)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "QVoJ5PtxMlVx",
-        "outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
-      },
-      "execution_count": 9,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "{\n",
-            "  \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
-            "  \"object\": \"chat.completion\",\n",
-            "  \"created\": 1691801223,\n",
-            "  \"model\": \"gpt-3.5-turbo-0613\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"index\": 0,\n",
-            "      \"message\": {\n",
-            "        \"role\": \"assistant\",\n",
-            "        \"content\": null,\n",
-            "        \"function_call\": {\n",
-            "          \"name\": \"get_current_weather\",\n",
-            "          \"arguments\": \"{\\n  \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
-            "        }\n",
-            "      },\n",
-            "      \"finish_reason\": \"function_call\"\n",
-            "    }\n",
-            "  ],\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": 82,\n",
-            "    \"completion_tokens\": 18,\n",
-            "    \"total_tokens\": 100\n",
-            "  }\n",
-            "}\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Parse GPT 3.5 Response\n",
-        "Read Information about what Function to Call"
-      ],
-      "metadata": {
-        "id": "Yu0o2saDNLx8"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
-        "function_call_data"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "u1DzXLJsNOR5",
-        "outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
-      },
-      "execution_count": 11,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "<OpenAIObject at 0x7922c70ce930> JSON: {\n",
-              "  \"name\": \"get_current_weather\",\n",
-              "  \"arguments\": \"{\\n  \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
-              "}"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 11
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import json\n",
-        "function_name = function_call_data['name']\n",
-        "function_args = function_call_data['arguments']\n",
-        "function_args = json.loads(function_args)\n",
-        "print(function_name, function_args)\n"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "tYb96Mh0NhH9",
-        "outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
-      },
-      "execution_count": 20,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "get_current_weather {'location': 'Boston, MA'}\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Call the get_current_weather() function"
-      ],
-      "metadata": {
-        "id": "z3tstH_yN3fX"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "if function_name == \"get_current_weather\":\n",
-        "  result = get_current_weather(**function_args)\n",
-        "  print(result)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "TSb8JHhgN5Zc",
-        "outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
-      },
-      "execution_count": 24,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "12F\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Send the response from get_current_weather back to the model to summarize"
-      ],
-      "metadata": {
-        "id": "k4HGJE3NRmMI"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "messages = [\n",
-        "    {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
-        "    {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
-        "    {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
-        "]\n",
-        "response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
-        "print(response)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "a23cmEwiPaw7",
-        "outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
-      },
-      "execution_count": 26,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "{\n",
-            "  \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
-            "  \"object\": \"chat.completion\",\n",
-            "  \"created\": 1691801963,\n",
-            "  \"model\": \"gpt-3.5-turbo-0613\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"index\": 0,\n",
-            "      \"message\": {\n",
-            "        \"role\": \"assistant\",\n",
-            "        \"content\": \"The current weather in Boston is 12 degrees Fahrenheit.\"\n",
-            "      },\n",
-            "      \"finish_reason\": \"stop\"\n",
-            "    }\n",
-            "  ],\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": 109,\n",
-            "    \"completion_tokens\": 12,\n",
-            "    \"total_tokens\": 121\n",
-            "  }\n",
-            "}\n"
-          ]
-        }
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "  \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
+      "  \"object\": \"chat.completion\",\n",
+      "  \"created\": 1691801223,\n",
+      "  \"model\": \"gpt-3.5-turbo-0613\",\n",
+      "  \"choices\": [\n",
+      "    {\n",
+      "      \"index\": 0,\n",
+      "      \"message\": {\n",
+      "        \"role\": \"assistant\",\n",
+      "        \"content\": null,\n",
+      "        \"function_call\": {\n",
+      "          \"name\": \"get_current_weather\",\n",
+      "          \"arguments\": \"{\\n  \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
+      "        }\n",
+      "      },\n",
+      "      \"finish_reason\": \"function_call\"\n",
+      "    }\n",
+      "  ],\n",
+      "  \"usage\": {\n",
+      "    \"prompt_tokens\": 82,\n",
+      "    \"completion_tokens\": 18,\n",
+      "    \"total_tokens\": 100\n",
+      "  }\n",
+      "}\n"
+     ]
    }
-  ]
+   ],
+   "source": [
+    "response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Yu0o2saDNLx8"
+   },
+   "source": [
+    "## Parse GPT 3.5 Response\n",
+    "Read Information about what Function to Call"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "u1DzXLJsNOR5",
+    "outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<OpenAIObject at 0x7922c70ce930> JSON: {\n",
+       "  \"name\": \"get_current_weather\",\n",
+       "  \"arguments\": \"{\\n  \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
+       "}"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
+    "function_call_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "tYb96Mh0NhH9",
+    "outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "get_current_weather {'location': 'Boston, MA'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "function_name = function_call_data['name']\n",
+    "function_args = function_call_data['arguments']\n",
+    "function_args = json.loads(function_args)\n",
+    "print(function_name, function_args)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "z3tstH_yN3fX"
+   },
+   "source": [
+    "## Call the get_current_weather() function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "TSb8JHhgN5Zc",
+    "outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "12F\n"
+     ]
+    }
+   ],
+   "source": [
+    "if function_name == \"get_current_weather\":\n",
+    "  result = get_current_weather(**function_args)\n",
+    "  print(result)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "k4HGJE3NRmMI"
+   },
+   "source": [
+    "## Send the response from get_current_weather back to the model to summarize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "a23cmEwiPaw7",
+    "outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "  \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
+      "  \"object\": \"chat.completion\",\n",
+      "  \"created\": 1691801963,\n",
+      "  \"model\": \"gpt-3.5-turbo-0613\",\n",
+      "  \"choices\": [\n",
+      "    {\n",
+      "      \"index\": 0,\n",
+      "      \"message\": {\n",
+      "        \"role\": \"assistant\",\n",
+      "        \"content\": \"The current weather in Boston is 12 degrees Fahrenheit.\"\n",
+      "      },\n",
+      "      \"finish_reason\": \"stop\"\n",
+      "    }\n",
+      "  ],\n",
+      "  \"usage\": {\n",
+      "    \"prompt_tokens\": 109,\n",
+      "    \"completion_tokens\": 12,\n",
+      "    \"total_tokens\": 121\n",
+      "  }\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "messages = [\n",
+    "    {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
+    "    {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
+    "    {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
+    "]\n",
+    "response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
+    "print(response)"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
--- a/cookbook/litellm-ollama-docker-image/test.py
+++ b/cookbook/litellm-ollama-docker-image/test.py
@ -1,13 +1,13 @@
 import openai

-api_base = f"http://0.0.0.0:8000"
+api_base = "http://0.0.0.0:8000"

 openai.api_base = api_base
 openai.api_key = "temp-key"
 print(openai.api_base)


-print(f"LiteLLM: response from proxy with streaming")
+print("LiteLLM: response from proxy with streaming")
 response = openai.ChatCompletion.create(
    model="ollama/llama2",
    messages=[
--- a/cookbook/litellm_Test_Multiple_Providers.ipynb
+++ b/cookbook/litellm_Test_Multiple_Providers.ipynb
--- a/cookbook/litellm_model_fallback.ipynb
+++ b/cookbook/litellm_model_fallback.ipynb
@ -1,52 +1,51 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "j6yJsCGeaq8G"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install litellm"
+   ]
  },
-  "cells": [
-    {
-      "cell_type": "code",
-      "source": [
-        "!pip install litellm"
-      ],
-      "metadata": {
-        "id": "j6yJsCGeaq8G"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "u129iWNPaf72"
-      },
-      "outputs": [],
-      "source": [
-        "import litellm\n",
-        "from litellm import embedding, completion\n",
-        "\n",
-        "model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n",
-        "\n",
-        "user_message = \"Hello, how are you?\"\n",
-        "messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
-        "\n",
-        "for model in model_fallback_list:\n",
-        "  try:\n",
-        "      response = completion(model=model, messages=messages)\n",
-        "  except Exception as e:\n",
-        "      print(f\"error occurred: {traceback.format_exc()}\")"
-      ]
-    }
-  ]
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "u129iWNPaf72"
+   },
+   "outputs": [],
+   "source": [
+    "from litellm import completion\n",
+    "\n",
+    "model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n",
+    "\n",
+    "user_message = \"Hello, how are you?\"\n",
+    "messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
+    "\n",
+    "for model in model_fallback_list:\n",
+    "  try:\n",
+    "      response = completion(model=model, messages=messages)\n",
+    "  except Exception:\n",
+    "      print(f\"error occurred: {traceback.format_exc()}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
--- a/cookbook/litellm_router/load_test_proxy.py
+++ b/cookbook/litellm_router/load_test_proxy.py
@ -1,14 +1,12 @@
-import sys, os
-import traceback
+import sys
+import os
 from dotenv import load_dotenv

 load_dotenv()
-import os, io

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-import pytest

 from litellm import Router
 import litellm
@ -137,7 +135,7 @@ for future in futures:
    else:
        failed_calls += 1

-print(f"Load test Summary:")
+print("Load test Summary:")
 print(f"Total Requests: {concurrent_calls}")
 print(f"Successful Calls: {successful_calls}")
 print(f"Failed Calls: {failed_calls}")
--- a/cookbook/litellm_router/load_test_queuing.py
+++ b/cookbook/litellm_router/load_test_queuing.py
@ -1,14 +1,12 @@
-import sys, os
-import traceback
+import sys
+import os
 from dotenv import load_dotenv

 load_dotenv()
-import os, io

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-import pytest

 from litellm import Router
 import litellm
@ -160,7 +158,7 @@ for future in futures:
        else:
            failed_calls += 1

-print(f"Load test Summary:")
+print("Load test Summary:")
 print(f"Total Requests: {concurrent_calls}")
 print(f"Successful Calls: {successful_calls}")
 print(f"Failed Calls: {failed_calls}")
--- a/cookbook/litellm_router/load_test_router.py
+++ b/cookbook/litellm_router/load_test_router.py
@ -1,14 +1,12 @@
-import sys, os
-import traceback
+import sys
+import os
 from dotenv import load_dotenv

 load_dotenv()
-import os, io

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-import pytest

 from litellm import Router
 import litellm
@ -132,7 +130,7 @@ for future in futures:
    else:
        failed_calls += 1

-print(f"Load test Summary:")
+print("Load test Summary:")
 print(f"Total Requests: {concurrent_calls}")
 print(f"Successful Calls: {successful_calls}")
 print(f"Failed Calls: {failed_calls}")
--- a/cookbook/litellm_router_load_test/memory_usage/router_endpoint.py
+++ b/cookbook/litellm_router_load_test/memory_usage/router_endpoint.py
@ -1,14 +1,9 @@
 from fastapi import FastAPI
 import uvicorn
-from memory_profiler import profile, memory_usage
+from memory_profiler import profile
 import os
-import traceback
-import asyncio
-import pytest
 import litellm
 from litellm import Router
-from concurrent.futures import ThreadPoolExecutor
-from collections import defaultdict
 from dotenv import load_dotenv
 import uuid

--- a/cookbook/litellm_router_load_test/memory_usage/router_memory_usage
+++ b/cookbook/litellm_router_load_test/memory_usage/router_memory_usage
@ -1,17 +1,16 @@
 #### What this tests ####

-from memory_profiler import profile, memory_usage
-import sys, os, time
-import traceback, asyncio
-import pytest
+from memory_profiler import profile
+import sys
+import os
+import time
+import asyncio

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
 from litellm import Router
-from concurrent.futures import ThreadPoolExecutor
-from collections import defaultdict
 from dotenv import load_dotenv
 import uuid

--- a/cookbook/litellm_router_load_test/memory_usage/router_memory_usage.py
+++ b/cookbook/litellm_router_load_test/memory_usage/router_memory_usage.py
@ -1,17 +1,16 @@
 #### What this tests ####

-from memory_profiler import profile, memory_usage
-import sys, os, time
-import traceback, asyncio
-import pytest
+from memory_profiler import profile
+import sys
+import os
+import time
+import asyncio

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
 from litellm import Router
-from concurrent.futures import ThreadPoolExecutor
-from collections import defaultdict
 from dotenv import load_dotenv
 import uuid

--- a/cookbook/litellm_router_load_test/test_loadtest_openai_client.py
+++ b/cookbook/litellm_router_load_test/test_loadtest_openai_client.py
@ -1,17 +1,14 @@
-import sys, os
-import traceback
+import sys
+import os
 from dotenv import load_dotenv
-import copy

 load_dotenv()
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import asyncio
-from litellm import Router, Timeout
+from litellm import Timeout
 import time
-from litellm.caching.caching import Cache
-import litellm
 import openai

 ### Test just calling AsyncAzureOpenAI
--- a/cookbook/litellm_router_load_test/test_loadtest_router.py
+++ b/cookbook/litellm_router_load_test/test_loadtest_router.py
@ -1,7 +1,6 @@
-import sys, os
-import traceback
+import sys
+import os
 from dotenv import load_dotenv
-import copy

 load_dotenv()
 sys.path.insert(
--- a/cookbook/litellm_router_load_test/test_loadtest_router_withs3_cache.py
+++ b/cookbook/litellm_router_load_test/test_loadtest_router_withs3_cache.py
@ -1,7 +1,6 @@
-import sys, os
-import traceback
+import sys
+import os
 from dotenv import load_dotenv
-import copy

 load_dotenv()
 sys.path.insert(
--- a/cookbook/misc/add_new_models.py
+++ b/cookbook/misc/add_new_models.py
@ -1,5 +1,4 @@
 import requests
-import json


 def get_initial_config():
--- a/cookbook/misc/migrate_proxy_config.py
+++ b/cookbook/misc/migrate_proxy_config.py
@ -36,7 +36,7 @@ def migrate_models(config_file, proxy_base_url):

        litellm_model_name = litellm_params.get("model", "") or ""
        if "vertex_ai/" in litellm_model_name:
-            print(f"\033[91m\nSkipping Vertex AI model\033[0m", model)
+            print("\033[91m\nSkipping Vertex AI model\033[0m", model)
            continue

        for param, value in litellm_params.items():
--- a/cookbook/misc/openai_timeouts.py
+++ b/cookbook/misc/openai_timeouts.py
@ -1,7 +1,6 @@
 import os
 from openai import OpenAI
 from dotenv import load_dotenv
-import httpx
 import concurrent.futures

 load_dotenv()
--- a/cookbook/misc/sagmaker_streaming.py
+++ b/cookbook/misc/sagmaker_streaming.py
@ -2,21 +2,16 @@
 import json
 import boto3

-import sys, os
-import traceback
+import sys
+import os
 from dotenv import load_dotenv

 load_dotenv()
-import os, io
+import io

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-import pytest
-import litellm
-
-import io
-import json


 class TokenIterator:
@ -48,7 +43,6 @@ payload = {
    "stream": True,
 }

-import boto3

 client = boto3.client("sagemaker-runtime", region_name="us-west-2")
 response = client.invoke_endpoint_with_response_stream(
--- a/cookbook/mlflow_langchain_tracing_litellm_proxy.ipynb
+++ b/cookbook/mlflow_langchain_tracing_litellm_proxy.ipynb
@ -111,7 +111,6 @@
   },
   "outputs": [],
   "source": [
-    "import mlflow\n",
    "mlflow.langchain.autolog()"
   ]
  },
--- a/db_scripts/create_views.py
+++ b/db_scripts/create_views.py
@ -3,7 +3,6 @@ python script to pre-create all views required by LiteLLM Proxy Server
 """

 import asyncio
-import os

 # Enter your DATABASE_URL here

@ -33,7 +32,7 @@ async def check_view_exists():  # noqa: PLR0915
        # Try to select one row from the view
        await db.query_raw("""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1""")
        print("LiteLLM_VerificationTokenView Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        # If an error occurs, the view does not exist, so create it
        await db.execute_raw(
            """
@ -54,7 +53,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""")
        print("MonthlyGlobalSpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS 
        SELECT
@ -74,7 +73,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "Last30dKeysBySpend" LIMIT 1""")
        print("Last30dKeysBySpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE OR REPLACE VIEW "Last30dKeysBySpend" AS
        SELECT 
@ -102,7 +101,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "Last30dModelsBySpend" LIMIT 1""")
        print("Last30dModelsBySpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE OR REPLACE VIEW "Last30dModelsBySpend" AS
        SELECT
@ -124,7 +123,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpendPerKey" LIMIT 1""")
        print("MonthlyGlobalSpendPerKey Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
            CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerKey" AS 
            SELECT
@ -147,7 +146,7 @@ async def check_view_exists():  # noqa: PLR0915
            """SELECT 1 FROM "MonthlyGlobalSpendPerUserPerKey" LIMIT 1"""
        )
        print("MonthlyGlobalSpendPerUserPerKey Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
            CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerUserPerKey" AS 
            SELECT
@ -171,7 +170,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM DailyTagSpend LIMIT 1""")
        print("DailyTagSpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE OR REPLACE VIEW DailyTagSpend AS
        SELECT
@ -189,7 +188,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "Last30dTopEndUsersSpend" LIMIT 1""")
        print("Last30dTopEndUsersSpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE VIEW "Last30dTopEndUsersSpend" AS
        SELECT end_user, COUNT(*) AS total_events, SUM(spend) AS total_spend
--- a/enterprise/enterprise_callbacks/example_logging_api.py
+++ b/enterprise/enterprise_callbacks/example_logging_api.py
@ -17,7 +17,7 @@ async def log_event(request: Request):
        # For now, just printing the received data

        return {"message": "Request received successfully"}
-    except Exception as e:
+    except Exception:
        raise HTTPException(status_code=500, detail="Internal Server Error")


--- a/enterprise/enterprise_callbacks/generic_api_callback.py
+++ b/enterprise/enterprise_callbacks/generic_api_callback.py
@ -2,12 +2,10 @@

 #### What this does ####
 #    On success, logs events to Promptlayer
-import dotenv, os
+import os

-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.caching.caching import DualCache

-from typing import Literal, Union, Optional
+from typing import Optional

 import traceback

@ -15,10 +13,8 @@ import traceback
 #### What this does ####
 #    On success + failure, log events to Supabase

-import dotenv, os
-import traceback
-import datetime, subprocess, sys
-import litellm, uuid
+import litellm
+import uuid
 from litellm._logging import print_verbose, verbose_logger


--- a/enterprise/enterprise_hooks/aporia_ai.py
+++ b/enterprise/enterprise_hooks/aporia_ai.py
@ -11,9 +11,9 @@ import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-from typing import Optional, Literal, Union, Any
-import litellm, traceback, sys, uuid
-from litellm.caching.caching import DualCache
+from typing import Optional, Literal, Any
+import litellm
+import sys
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_guardrail import CustomGuardrail
 from fastapi import HTTPException
@ -23,14 +23,10 @@ from litellm.litellm_core_utils.logging_utils import (
    convert_litellm_response_object_to_str,
 )
 from typing import List
-from datetime import datetime
-import aiohttp, asyncio
-from litellm._logging import verbose_proxy_logger
 from litellm.llms.custom_httpx.http_handler import (
    get_async_httpx_client,
    httpxSpecialProvider,
 )
-import httpx
 import json
 from litellm.types.guardrails import GuardrailEventHooks

@ -147,7 +143,6 @@ class AporiaGuardrail(CustomGuardrail):
        from litellm.proxy.common_utils.callback_utils import (
            add_guardrail_to_applied_guardrails_header,
        )
-        from litellm.types.guardrails import GuardrailEventHooks

        """
        Use this for the post call moderation with Guardrails
@ -183,7 +178,6 @@ class AporiaGuardrail(CustomGuardrail):
        from litellm.proxy.common_utils.callback_utils import (
            add_guardrail_to_applied_guardrails_header,
        )
-        from litellm.types.guardrails import GuardrailEventHooks

        event_type: GuardrailEventHooks = GuardrailEventHooks.during_call
        if self.should_run_guardrail(data=data, event_type=event_type) is not True:
--- a/enterprise/enterprise_hooks/banned_keywords.py
+++ b/enterprise/enterprise_hooks/banned_keywords.py
@ -7,14 +7,13 @@
 ## Reject a call / response if it contains certain keywords


-from typing import Optional, Literal
+from typing import Literal
 import litellm
 from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_proxy_logger
 from fastapi import HTTPException
-import json, traceback


 class _ENTERPRISE_BannedKeywords(CustomLogger):
@ -73,7 +72,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
            - check if user id part of call
            - check if user id part of blocked list
            """
-            self.print_verbose(f"Inside Banned Keyword List Pre-Call Hook")
+            self.print_verbose("Inside Banned Keyword List Pre-Call Hook")
            if call_type == "completion" and "messages" in data:
                for m in data["messages"]:
                    if "content" in m and isinstance(m["content"], str):
--- a/enterprise/enterprise_hooks/blocked_user_list.py
+++ b/enterprise/enterprise_hooks/blocked_user_list.py
@ -15,7 +15,6 @@ from litellm.proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
 from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_proxy_logger
 from fastapi import HTTPException
-import json, traceback


 class _ENTERPRISE_BlockedUserList(CustomLogger):
@ -69,7 +68,7 @@ class _ENTERPRISE_BlockedUserList(CustomLogger):
                - check if end-user in cache
                - check if end-user in db
            """
-            self.print_verbose(f"Inside Blocked User List Pre-Call Hook")
+            self.print_verbose("Inside Blocked User List Pre-Call Hook")
            if "user_id" in data or "user" in data:
                user = data.get("user_id", data.get("user", ""))
                if (
--- a/enterprise/enterprise_hooks/google_text_moderation.py
+++ b/enterprise/enterprise_hooks/google_text_moderation.py
@ -7,21 +7,12 @@
 #  Thank you users! We ❤️ you! - Krrish & Ishaan


-from typing import Optional, Literal, Union
-import litellm, traceback, sys, uuid
-from litellm.caching.caching import DualCache
+from typing import Literal
+import litellm
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
-from litellm.utils import (
-    ModelResponse,
-    EmbeddingResponse,
-    ImageResponse,
-    StreamingChoices,
-)
-from datetime import datetime
-import aiohttp, asyncio


 class _ENTERPRISE_GoogleTextModeration(CustomLogger):
--- a/enterprise/enterprise_hooks/llama_guard.py
+++ b/enterprise/enterprise_hooks/llama_guard.py
@ -7,28 +7,24 @@
 # +-------------------------------------------------------------+
 #  Thank you users! We ❤️ you! - Krrish & Ishaan

-import sys, os
+import sys
+import os
 from collections.abc import Iterable

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-from typing import Optional, Literal, Union
-import litellm, traceback, sys, uuid
-from litellm.caching.caching import DualCache
+from typing import Optional, Literal
+import litellm
+import sys
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
 from litellm.types.utils import (
    ModelResponse,
-    EmbeddingResponse,
-    ImageResponse,
-    StreamingChoices,
    Choices,
 )
-from datetime import datetime
-import aiohttp, asyncio

 litellm.set_verbose = True

--- a/enterprise/enterprise_hooks/llm_guard.py
+++ b/enterprise/enterprise_hooks/llm_guard.py
@ -7,26 +7,13 @@
 #  Thank you users! We ❤️ you! - Krrish & Ishaan
 ## This provides an LLM Guard Integration for content moderation on the proxy

-from typing import Optional, Literal, Union
+from typing import Optional, Literal
 import litellm
-import traceback
-import sys
-import uuid
-import os
-from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
-from litellm.utils import (
-    ModelResponse,
-    EmbeddingResponse,
-    ImageResponse,
-    StreamingChoices,
-)
-from datetime import datetime
 import aiohttp
-import asyncio
 from litellm.utils import get_formatted_prompt
 from litellm.secret_managers.main import get_secret_str

@ -164,7 +151,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
                "moderation",
                "audio_transcription",
            ]
-        except Exception as e:
+        except Exception:
            self.print_verbose(
                f"Call Type - {call_type}, not in accepted list - ['completion','embeddings','image_generation','moderation','audio_transcription']"
            )
--- a/enterprise/enterprise_hooks/openai_moderation.py
+++ b/enterprise/enterprise_hooks/openai_moderation.py
@ -5,27 +5,19 @@
 # +-------------------------------------------------------------+
 #  Thank you users! We ❤️ you! - Krrish & Ishaan

-import sys, os
+import sys
+import os

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-from typing import Optional, Literal, Union
-import litellm, traceback, sys, uuid
-from litellm.caching.caching import DualCache
+from typing import Literal
+import litellm
+import sys
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
-from litellm.utils import (
-    ModelResponse,
-    EmbeddingResponse,
-    ImageResponse,
-    StreamingChoices,
-)
-from datetime import datetime
-import aiohttp, asyncio
-from litellm._logging import verbose_proxy_logger

 litellm.set_verbose = True

--- a/enterprise/enterprise_hooks/secret_detection.py
+++ b/enterprise/enterprise_hooks/secret_detection.py
@ -471,8 +471,6 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
        data: dict,
        call_type: str,  # "completion", "embeddings", "image_generation", "moderation"
    ):
-        from detect_secrets import SecretsCollection
-        from detect_secrets.settings import default_settings

        if await self.should_run_check(user_api_key_dict) is False:
            return
--- a/enterprise/utils.py
+++ b/enterprise/utils.py
@ -1,6 +1,5 @@
 # Enterprise Proxy Util Endpoints
 from typing import Optional, List
-from litellm._logging import verbose_logger
 from litellm.proxy.proxy_server import PrismaClient, HTTPException
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
 import collections
@ -116,7 +115,7 @@ async def ui_get_spend_by_tags(


 def _forecast_daily_cost(data: list):
-    from datetime import datetime, timedelta
+    from datetime import timedelta

    if len(data) == 0:
        return {
--- a/litellm/init.py
+++ b/litellm/init.py
@ -1063,9 +1063,9 @@ from .llms.sagemaker.chat.transformation import SagemakerChatConfig
 from .llms.ollama_chat import OllamaChatConfig
 from .llms.bedrock.chat.invoke_handler import (
    AmazonCohereChatConfig,
-    AmazonConverseConfig,
    bedrock_tool_name_mappings,
 )
+from .llms.bedrock.chat.converse_transformation import AmazonConverseConfig
 from .llms.bedrock.common_utils import (
    AmazonTitanConfig,
    AmazonAI21Config,
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@ -1,7 +1,6 @@
 import json
 import logging
 import os
-import traceback
 from datetime import datetime
 from logging import Formatter

--- a/litellm/_redis.py
+++ b/litellm/_redis.py
@ -12,12 +12,11 @@ import json

 # s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
 import os
-from typing import Dict, List, Optional, Union
+from typing import List, Optional, Union

 import redis  # type: ignore
 import redis.asyncio as async_redis  # type: ignore

-import litellm
 from litellm import get_secret, get_secret_str

 from ._logging import verbose_logger
--- a/litellm/adapters/anthropic_adapter.py
+++ b/litellm/adapters/anthropic_adapter.py
@ -1,23 +1,12 @@
 # What is this?
 ## Translates OpenAI call to Anthropic `/v1/messages` format
-import json
-import os
 import traceback
-import uuid
-from typing import Any, Literal, Optional
-
-import dotenv
-import httpx
-from pydantic import BaseModel
+from typing import Any, Optional

 import litellm
 from litellm import ChatCompletionRequest, verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.types.llms.anthropic import (
-    AnthropicMessagesRequest,
-    AnthropicResponse,
-    ContentBlockDelta,
-)
+from litellm.types.llms.anthropic import AnthropicMessagesRequest, AnthropicResponse
 from litellm.types.utils import AdapterCompletionStreamWrapper, ModelResponse


--- a/litellm/assistants/main.py
+++ b/litellm/assistants/main.py
@ -7,12 +7,11 @@ from functools import partial
 from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union

 import httpx
-from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
+from openai import AsyncOpenAI, OpenAI
 from openai.types.beta.assistant import Assistant
 from openai.types.beta.assistant_deleted import AssistantDeleted

 import litellm
-from litellm.llms.azure import assistants
 from litellm.types.router import GenericLiteLLMParams
 from litellm.utils import (
    exception_type,
--- a/litellm/batch_completion/main.py
+++ b/litellm/batch_completion/main.py
@ -144,7 +144,6 @@ def batch_completion_models(*args, **kwargs):
        This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models.
        It sends requests concurrently and returns the response from the first model that responds.
    """
-    import concurrent

    if "model" in kwargs:
        kwargs.pop("model")
--- a/litellm/batches/main.py
+++ b/litellm/batches/main.py
@ -19,24 +19,11 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union
 import httpx

 import litellm
-from litellm import client
 from litellm.llms.azure.azure import AzureBatchesAPI
 from litellm.llms.openai.openai import OpenAIBatchesAPI
-from litellm.llms.vertex_ai.batches.handler import (
-    VertexAIBatchPrediction,
-)
-from litellm.secret_managers.main import get_secret, get_secret_str
-from litellm.types.llms.openai import (
-    Batch,
-    CancelBatchRequest,
-    CreateBatchRequest,
-    CreateFileRequest,
-    FileContentRequest,
-    FileObject,
-    FileTypes,
-    HttpxBinaryResponseContent,
-    RetrieveBatchRequest,
-)
+from litellm.llms.vertex_ai.batches.handler import VertexAIBatchPrediction
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import Batch, CreateBatchRequest, RetrieveBatchRequest
 from litellm.types.router import GenericLiteLLMParams
 from litellm.utils import supports_httpx_timeout

--- a/litellm/budget_manager.py
+++ b/litellm/budget_manager.py
@ -11,7 +11,7 @@ import json
 import os
 import threading
 import time
-from typing import Literal, Optional, Union
+from typing import Literal, Optional

 import litellm
 from litellm.utils import ModelResponse
--- a/litellm/caching/caching.py
+++ b/litellm/caching/caching.py
@ -8,16 +8,12 @@
 #  Thank you users! We ❤️ you! - Krrish & Ishaan

 import ast
-import asyncio
 import hashlib
-import inspect
-import io
 import json
-import logging
 import time
 import traceback
 from enum import Enum
-from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
+from typing import Any, Dict, List, Optional, Set, Union

 from openai.types.audio.transcription_create_params import TranscriptionCreateParams
 from openai.types.chat.completion_create_params import (
@ -41,7 +37,7 @@ from litellm.types.utils import all_litellm_params

 from .base_cache import BaseCache
 from .disk_cache import DiskCache
-from .dual_cache import DualCache
+from .dual_cache import DualCache  # noqa
 from .in_memory_cache import InMemoryCache
 from .qdrant_semantic_cache import QdrantSemanticCache
 from .redis_cache import RedisCache
--- a/litellm/caching/caching_handler.py
+++ b/litellm/caching/caching_handler.py
@ -35,13 +35,7 @@ from pydantic import BaseModel

 import litellm
 from litellm._logging import print_verbose, verbose_logger
-from litellm.caching.caching import (
-    Cache,
-    QdrantSemanticCache,
-    RedisCache,
-    RedisSemanticCache,
-    S3Cache,
-)
+from litellm.caching.caching import S3Cache
 from litellm.litellm_core_utils.logging_utils import (
    _assemble_complete_response_from_streaming_chunks,
 )
@ -550,12 +544,7 @@ class LLMCachingHandler:
        Returns:
            Optional[Any]:
        """
-        from litellm.utils import (
-            CustomStreamWrapper,
-            convert_to_model_response_object,
-            convert_to_streaming_response,
-            convert_to_streaming_response_async,
-        )
+        from litellm.utils import convert_to_model_response_object

        if (
            call_type == CallTypes.acompletion.value
--- a/litellm/caching/disk_cache.py
+++ b/litellm/caching/disk_cache.py
@ -1,8 +1,6 @@
 import json
 from typing import TYPE_CHECKING, Any, Optional

-from litellm._logging import print_verbose
-
 from .base_cache import BaseCache

 if TYPE_CHECKING:
--- a/litellm/caching/dual_cache.py
+++ b/litellm/caching/dual_cache.py
@ -12,7 +12,7 @@ import asyncio
 import time
 import traceback
 from concurrent.futures import ThreadPoolExecutor
-from typing import TYPE_CHECKING, Any, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, List, Optional

 import litellm
 from litellm._logging import print_verbose, verbose_logger
--- a/litellm/caching/qdrant_semantic_cache.py
+++ b/litellm/caching/qdrant_semantic_cache.py
@ -15,7 +15,6 @@ from typing import Any

 import litellm
 from litellm._logging import print_verbose
-from litellm.types.caching import LiteLLMCacheType

 from .base_cache import BaseCache

--- a/litellm/caching/redis_cache.py
+++ b/litellm/caching/redis_cache.py
@ -13,7 +13,6 @@ import asyncio
 import inspect
 import json
 import time
-import traceback
 from datetime import timedelta
 from typing import TYPE_CHECKING, Any, List, Optional, Tuple

@ -21,8 +20,7 @@ import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
 from litellm.types.caching import RedisPipelineIncrementOperation
-from litellm.types.services import ServiceLoggerPayload, ServiceTypes
-from litellm.types.utils import all_litellm_params
+from litellm.types.services import ServiceTypes

 from .base_cache import BaseCache

@ -53,7 +51,6 @@ class RedisCache(BaseCache):
        startup_nodes: Optional[List] = None,  # for redis-cluster
        **kwargs,
    ):
-        import redis

        from litellm._service_logger import ServiceLogging

--- a/litellm/caching/redis_semantic_cache.py
+++ b/litellm/caching/redis_semantic_cache.py
@ -32,7 +32,6 @@ class RedisSemanticCache(BaseCache):
        **kwargs,
    ):
        from redisvl.index import SearchIndex
-        from redisvl.query import VectorQuery

        print_verbose(
            "redis semantic-cache initializing INDEX - litellm_semantic_cache_index"
@ -141,7 +140,6 @@ class RedisSemanticCache(BaseCache):

    def get_cache(self, key, **kwargs):
        print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}")
-        import numpy as np
        from redisvl.query import VectorQuery

        # query
@ -253,7 +251,6 @@ class RedisSemanticCache(BaseCache):

    async def async_get_cache(self, key, **kwargs):
        print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}")
-        import numpy as np
        from redisvl.query import VectorQuery

        from litellm.proxy.proxy_server import llm_model_list, llm_router
--- a/litellm/caching/s3_cache.py
+++ b/litellm/caching/s3_cache.py
@ -12,11 +12,9 @@ Has 4 methods:
 import ast
 import asyncio
 import json
-from typing import Any, Optional
+from typing import Optional

-import litellm
 from litellm._logging import print_verbose, verbose_logger
-from litellm.types.caching import LiteLLMCacheType

 from .base_cache import BaseCache

@ -103,7 +101,6 @@ class S3Cache(BaseCache):
        self.set_cache(key=key, value=value, **kwargs)

    def get_cache(self, key, **kwargs):
-        import boto3
        import botocore

        try:
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -1,7 +1,6 @@
 # What is this?
 ## File for 'response_cost' calculation in Logging
 import time
-import traceback
 from typing import Any, List, Literal, Optional, Tuple, Union

 from pydantic import BaseModel
@ -44,14 +43,12 @@ from litellm.llms.openai.cost_calculation import (
    cost_per_second as openai_cost_per_second,
 )
 from litellm.llms.openai.cost_calculation import cost_per_token as openai_cost_per_token
-from litellm.llms.openai.cost_calculation import cost_router as openai_cost_router
 from litellm.llms.together_ai.cost_calculator import get_model_params_and_category
 from litellm.llms.vertex_ai.image_generation.cost_calculator import (
    cost_calculator as vertex_ai_image_cost_calculator,
 )
 from litellm.types.llms.openai import HttpxBinaryResponseContent
 from litellm.types.rerank import RerankResponse
-from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
 from litellm.types.utils import CallTypesLiteral, PassthroughCallTypes, Usage
 from litellm.utils import (
    CallTypes,
--- a/litellm/files/main.py
+++ b/litellm/files/main.py
@ -14,14 +14,11 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union, cast
 import httpx

 import litellm
-from litellm import client, get_secret_str
+from litellm import get_secret_str
 from litellm.llms.azure.files.handler import AzureOpenAIFilesAPI
 from litellm.llms.openai.openai import FileDeleted, FileObject, OpenAIFilesAPI
-from litellm.llms.vertex_ai.files.handler import (
-    VertexAIFilesHandler,
-)
+from litellm.llms.vertex_ai.files.handler import VertexAIFilesHandler
 from litellm.types.llms.openai import (
-    Batch,
    CreateFileRequest,
    FileContentRequest,
    FileTypes,
--- a/litellm/fine_tuning/main.py
+++ b/litellm/fine_tuning/main.py
@ -19,10 +19,10 @@ import httpx
 import litellm
 from litellm._logging import verbose_logger
 from litellm.llms.azure.fine_tuning.handler import AzureOpenAIFineTuningAPI
-from litellm.llms.openai.fine_tuning.handler import OpenAIFineTuningAPI, FineTuningJob, FineTuningJobCreate
+from litellm.llms.openai.fine_tuning.handler import FineTuningJob, OpenAIFineTuningAPI
 from litellm.llms.vertex_ai.fine_tuning.handler import VertexFineTuningAPI
 from litellm.secret_managers.main import get_secret_str
-from litellm.types.llms.openai import Hyperparameters
+from litellm.types.llms.openai import FineTuningJobCreate, Hyperparameters
 from litellm.types.router import *
 from litellm.utils import supports_httpx_timeout

--- a/litellm/integrations/SlackAlerting/batching_handler.py
+++ b/litellm/integrations/SlackAlerting/batching_handler.py
@ -6,11 +6,9 @@ Slack alerts are sent every 10s or when events are greater than X events
 see custom_batch_logger.py for more details / defaults 
 """

-import os
-from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
+from typing import TYPE_CHECKING, Any

-from litellm._logging import verbose_logger, verbose_proxy_logger
-from litellm.proxy._types import AlertType, WebhookEvent
+from litellm._logging import verbose_proxy_logger

 if TYPE_CHECKING:
    from .slack_alerting import SlackAlerting as _SlackAlerting
@ -21,7 +19,6 @@ else:


 def squash_payloads(queue):
-    import json

    squashed = {}
    if len(queue) == 0:
--- a/litellm/integrations/SlackAlerting/slack_alerting.py
+++ b/litellm/integrations/SlackAlerting/slack_alerting.py
@ -4,16 +4,10 @@ import asyncio
 import datetime
 import os
 import random
-import threading
 import time
-import traceback
-from datetime import datetime as dt
-from datetime import timedelta, timezone
-from enum import Enum
-from typing import Any, Dict, List, Literal, Optional, Set, TypedDict, Union, get_args
+from datetime import timedelta
+from typing import Any, Dict, List, Literal, Optional, Union

-import aiohttp
-import dotenv
 from openai import APIError

 import litellm
@ -26,22 +20,13 @@ from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.litellm_core_utils.exception_mapping_utils import (
    _add_key_name_and_team_to_alert,
 )
-from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.llms.custom_httpx.http_handler import (
-    AsyncHTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
-from litellm.proxy._types import (
-    AlertType,
-    CallInfo,
-    UserAPIKeyAuth,
-    VirtualKeyEvent,
-    WebhookEvent,
-)
+from litellm.proxy._types import AlertType, CallInfo, VirtualKeyEvent, WebhookEvent
 from litellm.router import Router
 from litellm.types.integrations.slack_alerting import *
-from litellm.types.router import LiteLLM_Params

 from ..email_templates.templates import *
 from .batching_handler import send_to_webhook, squash_payloads
@ -1261,7 +1246,7 @@ Model Info:

        Returns -> True if sent, False if not.
        """
-        from litellm.proxy.proxy_server import premium_user, prisma_client
+        from litellm.proxy.proxy_server import premium_user
        from litellm.proxy.utils import send_email

        email_logo_url = os.getenv(
@ -1370,7 +1355,6 @@ Model Info:
        if alert_type not in self.alert_types:
            return

-        import json
        from datetime import datetime

        # Get the current timestamp
--- a/litellm/integrations/SlackAlerting/utils.py
+++ b/litellm/integrations/SlackAlerting/utils.py
@ -5,7 +5,6 @@ Utils used for slack alerting
 import asyncio
 from typing import Dict, List, Optional, Union

-import litellm
 from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.proxy._types import AlertType
 from litellm.secret_managers.main import get_secret
--- a/litellm/integrations/argilla.py
+++ b/litellm/integrations/argilla.py
@ -6,14 +6,9 @@ import asyncio
 import json
 import os
 import random
-import time
-import traceback
 import types
-import uuid
-from datetime import datetime, timezone
-from typing import Any, Dict, List, Optional, TypedDict, Union
+from typing import Any, Dict, List, Optional

-import dotenv  # type: ignore
 import httpx
 from pydantic import BaseModel  # type: ignore

@ -21,11 +16,7 @@ import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.litellm_core_utils.prompt_templates.common_utils import (
-    get_content_from_model_response,
-)
 from litellm.llms.custom_httpx.http_handler import (
-    AsyncHTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
@ -33,7 +24,6 @@ from litellm.types.integrations.argilla import (
    SUPPORTED_PAYLOAD_FIELDS,
    ArgillaCredentialsObject,
    ArgillaItem,
-    ArgillaPayload,
 )
 from litellm.types.utils import StandardLoggingPayload

--- a/litellm/integrations/arize_ai.py
+++ b/litellm/integrations/arize_ai.py
@ -5,7 +5,7 @@ this file has Arize ai specific helper functions
 """

 import json
-from typing import TYPE_CHECKING, Any, Optional, Union
+from typing import TYPE_CHECKING, Any, Optional

 from litellm._logging import verbose_logger

@ -30,7 +30,6 @@ class ArizeLogger:
    def set_arize_ai_attributes(span: Span, kwargs, response_obj):
        from litellm.integrations._types.open_inference import (
            MessageAttributes,
-            MessageContentAttributes,
            OpenInferenceSpanKindValues,
            SpanAttributes,
        )
--- a/litellm/integrations/azure_storage/azure_storage.py
+++ b/litellm/integrations/azure_storage/azure_storage.py
@ -3,23 +3,8 @@ import json
 import os
 import uuid
 from datetime import datetime, timedelta
-from re import S, T
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    Dict,
-    List,
-    Optional,
-    Tuple,
-    TypedDict,
-    Union,
-)
+from typing import List, Optional

-import httpx
-from pydantic import BaseModel, Field
-
-import litellm
 from litellm._logging import verbose_logger
 from litellm.constants import AZURE_STORAGE_MSFT_VERSION
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
--- a/litellm/integrations/braintrust_logging.py
+++ b/litellm/integrations/braintrust_logging.py
@ -2,15 +2,10 @@
 ## Log success + failure events to Braintrust

 import copy
-import json
 import os
-import threading
-import traceback
-import uuid
 from datetime import datetime
-from typing import Literal, Optional
+from typing import Optional

-import dotenv
 import httpx
 from pydantic import BaseModel

@ -18,12 +13,11 @@ import litellm
 from litellm import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.llms.custom_httpx.http_handler import (
-    AsyncHTTPHandler,
    HTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
-from litellm.utils import get_formatted_prompt, print_verbose
+from litellm.utils import print_verbose

 global_braintrust_http_handler = get_async_httpx_client(
    llm_provider=httpxSpecialProvider.LoggingCallback
--- a/litellm/integrations/custom_batch_logger.py
+++ b/litellm/integrations/custom_batch_logger.py
@ -6,7 +6,7 @@ Use this if you want your logs to be stored in memory and flushed periodically

 import asyncio
 import time
-from typing import List, Literal, Optional
+from typing import List, Optional

 import litellm
 from litellm._logging import verbose_logger
--- a/litellm/integrations/custom_guardrail.py
+++ b/litellm/integrations/custom_guardrail.py
@ -1,4 +1,4 @@
-from typing import List, Literal, Optional
+from typing import List, Optional

 from litellm._logging import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@ -1,18 +1,14 @@
 #### What this does ####
 #    On success, logs events to Promptlayer
-import os
 import traceback
-from datetime import datetime as datetimeObj
 from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union

-import dotenv
 from pydantic import BaseModel

 from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.types.integrations.argilla import ArgillaItem
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest
-from litellm.types.services import ServiceLoggerPayload
 from litellm.types.utils import (
    AdapterCompletionStreamWrapper,
    EmbeddingResponse,
--- a/litellm/integrations/datadog/datadog.py
+++ b/litellm/integrations/datadog/datadog.py
@ -16,11 +16,10 @@ For batching specific details see CustomBatchLogger class
 import asyncio
 import datetime
 import os
-import sys
 import traceback
 import uuid
 from datetime import datetime as datetimeObj
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, List, Optional, Union

 from httpx import Response

@ -32,7 +31,6 @@ from litellm.llms.custom_httpx.http_handler import (
    get_async_httpx_client,
    httpxSpecialProvider,
 )
-from litellm.proxy._types import UserAPIKeyAuth
 from litellm.types.integrations.datadog import *
 from litellm.types.services import ServiceLoggerPayload
 from litellm.types.utils import StandardLoggingPayload
--- a/litellm/integrations/datadog/datadog_llm_obs.py
+++ b/litellm/integrations/datadog/datadog_llm_obs.py
@ -8,12 +8,9 @@ API Reference: https://docs.datadoghq.com/llm_observability/setup/api/?tab=examp

 import asyncio
 import os
-import traceback
 import uuid
 from datetime import datetime
-from typing import Any, Dict, List, Optional, Union
-
-from httpx import Response
+from typing import Any, Dict, List, Optional

 import litellm
 from litellm._logging import verbose_logger
--- a/litellm/integrations/dynamodb.py
+++ b/litellm/integrations/dynamodb.py
@ -1,14 +1,11 @@
 #### What this does ####
 #    On success + failure, log events to Supabase

-import datetime
 import os
 import traceback
 import uuid
 from typing import Any

-import dotenv
-
 import litellm


--- a/litellm/integrations/email_alerting.py
+++ b/litellm/integrations/email_alerting.py
@ -2,7 +2,6 @@
 Functions for sending Email Alerts
 """

-import asyncio
 import os
 from typing import List, Optional

@ -20,7 +19,7 @@ async def get_all_team_member_emails(team_id: Optional[str] = None) -> list:
    )
    if team_id is None:
        return []
-    from litellm.proxy.proxy_server import premium_user, prisma_client
+    from litellm.proxy.proxy_server import prisma_client

    if prisma_client is None:
        raise Exception("Not connected to DB!")
@ -72,7 +71,6 @@ async def send_team_budget_alert(webhook_event: WebhookEvent) -> bool:
    Send an Email Alert to All Team Members when the Team Budget is crossed
    Returns -> True if sent, False if not.
    """
-    from litellm.proxy.proxy_server import premium_user, prisma_client
    from litellm.proxy.utils import send_email

    _team_id = webhook_event.team_id
--- a/litellm/integrations/galileo.py
+++ b/litellm/integrations/galileo.py
@ -1,15 +1,12 @@
 import os
-from datetime import datetime
 from typing import Any, Dict, List, Optional

-import httpx
 from pydantic import BaseModel, Field

 import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.llms.custom_httpx.http_handler import (
-    _get_httpx_client,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
--- a/litellm/integrations/gcs_bucket/gcs_bucket.py
+++ b/litellm/integrations/gcs_bucket/gcs_bucket.py
@ -1,27 +1,14 @@
 import asyncio
-import json
 import os
 import uuid
 from datetime import datetime
-from re import S
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypedDict, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional

-import httpx
-from pydantic import BaseModel, Field
-
-import litellm
 from litellm._logging import verbose_logger
-from litellm.integrations.custom_batch_logger import CustomBatchLogger
-from litellm.integrations.custom_logger import CustomLogger
 from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
-from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload
+from litellm.proxy._types import CommonProxyErrors
 from litellm.types.integrations.gcs_bucket import *
-from litellm.types.utils import (
-    StandardCallbackDynamicParams,
-    StandardLoggingMetadata,
-    StandardLoggingPayload,
-)
+from litellm.types.utils import StandardLoggingPayload

 if TYPE_CHECKING:
    from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
--- a/litellm/integrations/gcs_bucket/gcs_bucket_base.py
+++ b/litellm/integrations/gcs_bucket/gcs_bucket_base.py
@ -1,13 +1,7 @@
 import json
 import os
-import uuid
-from datetime import datetime
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypedDict, Union
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union

-import httpx
-from pydantic import BaseModel, Field
-
-import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.llms.custom_httpx.http_handler import (
@ -15,11 +9,7 @@ from litellm.llms.custom_httpx.http_handler import (
    httpxSpecialProvider,
 )
 from litellm.types.integrations.gcs_bucket import *
-from litellm.types.utils import (
-    StandardCallbackDynamicParams,
-    StandardLoggingMetadata,
-    StandardLoggingPayload,
-)
+from litellm.types.utils import StandardCallbackDynamicParams, StandardLoggingPayload

 if TYPE_CHECKING:
    from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
@ -190,9 +180,7 @@ class GCSBucketBase(CustomBatchLogger):
        This function is used to get the Vertex instance for the GCS Bucket Logger.
        It checks if the Vertex instance is already created and cached, if not it creates a new instance and caches it.
        """
-        from litellm.llms.vertex_ai.vertex_llm_base import (
-            VertexBase,
-        )
+        from litellm.llms.vertex_ai.vertex_llm_base import VertexBase

        _in_memory_key = self._get_in_memory_key_for_vertex_instance(credentials)
        if _in_memory_key not in self.vertex_instances:
--- a/litellm/integrations/helicone.py
+++ b/litellm/integrations/helicone.py
@ -3,10 +3,7 @@
 import os
 import traceback

-import dotenv
-
 import litellm
-from litellm._logging import verbose_logger


 class HeliconeLogger:
--- a/litellm/integrations/lago.py
+++ b/litellm/integrations/lago.py
@ -3,11 +3,9 @@

 import json
 import os
-import traceback
 import uuid
 from typing import Literal, Optional

-import dotenv
 import httpx

 import litellm
--- a/litellm/integrations/langfuse/langfuse.py
+++ b/litellm/integrations/langfuse/langfuse.py
@ -3,7 +3,6 @@
 import copy
 import os
 import traceback
-import types
 from collections.abc import MutableMapping, MutableSequence, MutableSet
 from typing import TYPE_CHECKING, Any, Dict, Optional, cast

--- a/litellm/integrations/langfuse/langfuse_handler.py
+++ b/litellm/integrations/langfuse/langfuse_handler.py
@ -6,11 +6,8 @@ Used to get the LangFuseLogger for a given request
 Handles Key/Team Based Langfuse Logging
 """

-import os
 from typing import TYPE_CHECKING, Any, Dict, Optional

-from packaging.version import Version
-
 from litellm.litellm_core_utils.litellm_logging import StandardCallbackDynamicParams

 from .langfuse import LangFuseLogger, LangfuseLoggingConfig
--- a/litellm/integrations/langsmith.py
+++ b/litellm/integrations/langsmith.py
@ -3,14 +3,12 @@
 import asyncio
 import os
 import random
-import time
 import traceback
 import types
 import uuid
 from datetime import datetime, timezone
-from typing import Any, Dict, List, Optional, TypedDict, Union
+from typing import Any, Dict, List, Optional

-import dotenv  # type: ignore
 import httpx
 from pydantic import BaseModel  # type: ignore

@ -18,7 +16,6 @@ import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.llms.custom_httpx.http_handler import (
-    AsyncHTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
--- a/litellm/integrations/langtrace.py
+++ b/litellm/integrations/langtrace.py
@ -1,9 +1,7 @@
-import traceback
 import json
-from litellm.integrations.custom_logger import CustomLogger
-from litellm.proxy._types import SpanAttributes
+from typing import TYPE_CHECKING, Any

-from typing import TYPE_CHECKING, Any, Optional, Union
+from litellm.proxy._types import SpanAttributes

 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
--- a/litellm/integrations/openmeter.py
+++ b/litellm/integrations/openmeter.py
@ -3,17 +3,12 @@

 import json
 import os
-import traceback
-import uuid

-import dotenv
 import httpx

 import litellm
-from litellm import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.llms.custom_httpx.http_handler import (
-    AsyncHTTPHandler,
    HTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -1,7 +1,6 @@
 import os
 from dataclasses import dataclass
 from datetime import datetime
-from functools import wraps
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union

 import litellm
@ -10,10 +9,7 @@ from litellm.integrations.custom_logger import CustomLogger
 from litellm.types.services import ServiceLoggerPayload
 from litellm.types.utils import (
    ChatCompletionMessageToolCall,
-    EmbeddingResponse,
    Function,
-    ImageResponse,
-    ModelResponse,
    StandardLoggingPayload,
 )

@ -139,7 +135,6 @@ class OpenTelemetry(CustomLogger):
        end_time: Optional[Union[datetime, float]] = None,
        event_metadata: Optional[dict] = None,
    ):
-        from datetime import datetime

        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
@ -201,7 +196,6 @@ class OpenTelemetry(CustomLogger):
        end_time: Optional[Union[float, datetime]] = None,
        event_metadata: Optional[dict] = None,
    ):
-        from datetime import datetime

        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
@ -666,7 +660,6 @@ class OpenTelemetry(CustomLogger):
        span.set_attribute(key, primitive_value)

    def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
-        from litellm.proxy._types import SpanAttributes

        kwargs.get("optional_params", {})
        litellm_params = kwargs.get("litellm_params", {}) or {}
@ -834,7 +827,6 @@ class OpenTelemetry(CustomLogger):
        logging_payload: ManagementEndpointLoggingPayload,
        parent_otel_span: Optional[Span] = None,
    ):
-        from datetime import datetime

        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
@ -889,7 +881,6 @@ class OpenTelemetry(CustomLogger):
        logging_payload: ManagementEndpointLoggingPayload,
        parent_otel_span: Optional[Span] = None,
    ):
-        from datetime import datetime

        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
--- a/litellm/integrations/opik/utils.py
+++ b/litellm/integrations/opik/utils.py
@ -3,8 +3,6 @@ import os
 import time
 from typing import Dict, Final, List, Optional

-from litellm.types.utils import ModelResponse
-
 CONFIG_FILE_PATH_DEFAULT: Final[str] = "~/.opik.config"


--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -1,15 +1,10 @@
 # used for /metrics endpoint on LiteLLM Proxy
 #### What this does ####
 #    On success, log events to Prometheus
-import os
-import subprocess
 import sys
-import traceback
-import uuid
-from datetime import date, datetime, timedelta
-from typing import Optional, TypedDict, Union
+from datetime import datetime, timedelta
+from typing import Optional

-import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
--- a/litellm/integrations/prometheus_helpers/prometheus_api.py
+++ b/litellm/integrations/prometheus_helpers/prometheus_api.py
@ -2,13 +2,10 @@
 Helper functions to query prometheus API
 """

-import asyncio
-import os
 import time
 from datetime import datetime, timedelta
 from typing import Optional

-import litellm
 from litellm import get_secret
 from litellm._logging import verbose_logger
 from litellm.llms.custom_httpx.http_handler import (
--- a/litellm/integrations/prometheus_services.py
+++ b/litellm/integrations/prometheus_services.py
@ -3,15 +3,8 @@
 #    On success + failure, log events to Prometheus for litellm / adjacent services (litellm, redis, postgres, llm api providers)


-import datetime
-import os
-import subprocess
-import sys
-import traceback
-import uuid
 from typing import List, Optional, Union

-import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.types.integrations.prometheus import LATENCY_BUCKETS
 from litellm.types.services import ServiceLoggerPayload, ServiceTypes
--- a/litellm/integrations/s3.py
+++ b/litellm/integrations/s3.py
@ -1,12 +1,6 @@
 #### What this does ####
 #    On success + failure, log events to Supabase

-import datetime
-import os
-import subprocess
-import sys
-import traceback
-import uuid
 from typing import Optional

 import litellm
--- a/litellm/integrations/supabase.py
+++ b/litellm/integrations/supabase.py
@ -1,14 +1,11 @@
 #### What this does ####
 #    On success + failure, log events to Supabase

-import datetime
 import os
 import subprocess
 import sys
 import traceback

-import dotenv
-
 import litellm


--- a/litellm/integrations/traceloop.py
+++ b/litellm/integrations/traceloop.py
@ -1,6 +1,5 @@
 import traceback

-import litellm
 from litellm._logging import verbose_logger


@ -12,9 +11,7 @@ class TraceloopLogger:

    def __init__(self):
        try:
-            from opentelemetry.sdk.trace.export import ConsoleSpanExporter
            from traceloop.sdk import Traceloop
-            from traceloop.sdk.instruments import Instruments
            from traceloop.sdk.tracing.tracing import TracerWrapper
        except ModuleNotFoundError as e:
            verbose_logger.error(
@ -39,7 +36,6 @@ class TraceloopLogger:
        level="DEFAULT",
        status_message=None,
    ):
-        from opentelemetry import trace
        from opentelemetry.semconv.ai import SpanAttributes
        from opentelemetry.trace import SpanKind, Status, StatusCode

@ -78,7 +74,7 @@ class TraceloopLogger:
                    )
                if "top_p" in optional_params:
                    span.set_attribute(
-                        SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
+                        SpanAttributes.LLM_REQUEST_TOP_P, optional_params.get("top_p")
                    )
                if "tools" in optional_params or "functions" in optional_params:
                    span.set_attribute(
--- a/litellm/integrations/weights_biases.py
+++ b/litellm/integrations/weights_biases.py
@ -173,16 +173,14 @@ except Exception:

 #### What this does ####
 #    On success, logs events to Langfuse
-import os
 import traceback
-from datetime import datetime


 class WeightsBiasesLogger:
    # Class variables or attributes
    def __init__(self):
        try:
-            import wandb
+            pass
        except Exception:
            raise Exception(
                "\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m"
--- a/litellm/litellm_core_utils/asyncify.py
+++ b/litellm/litellm_core_utils/asyncify.py
@ -3,7 +3,6 @@ from typing import Awaitable, Callable, Optional

 import anyio
 import anyio.to_thread
-from anyio import to_thread
 from typing_extensions import ParamSpec, TypeVar

 T_ParamSpec = ParamSpec("T_ParamSpec")
--- a/litellm/litellm_core_utils/core_helpers.py
+++ b/litellm/litellm_core_utils/core_helpers.py
@ -1,7 +1,6 @@
 # What is this?
 ## Helper utilities
-import os
-from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Optional, Union

 import httpx

--- a/litellm/litellm_core_utils/exception_mapping_utils.py
+++ b/litellm/litellm_core_utils/exception_mapping_utils.py
@ -1,6 +1,4 @@
 import json
-import os
-import threading
 import traceback
 from typing import Optional

@ -14,17 +12,14 @@ from ..exceptions import (
    APIError,
    AuthenticationError,
    BadRequestError,
-    BudgetExceededError,
    ContentPolicyViolationError,
    ContextWindowExceededError,
    NotFoundError,
-    OpenAIError,
    PermissionDeniedError,
    RateLimitError,
    ServiceUnavailableError,
    Timeout,
    UnprocessableEntityError,
-    UnsupportedParamsError,
 )


--- a/Show more
+++ b/Show more