(code quality) run ruff rule to ban unused imports (#7313)

* remove unused imports * fix AmazonConverseConfig * fix test * fix import * ruff check fixes * test fixes * fix testing * fix imports
2025-04-25 10:44:24 +00:00 · 2024-12-19 12:33:42 -08:00 · 2024-12-19 12:33:42 -08:00 · c7f14e936a
commit c7f14e936a
parent 5e344497ce
347 changed files with 5473 additions and 7207 deletions
--- a/.github/workflows/locustfile.py
+++ b/.github/workflows/locustfile.py
@ -1,6 +1,4 @@
-from locust import HttpUser, task, between, events
+from locust import HttpUser, task, between
 import json
 import time
 class MyUser(HttpUser):
@ -10,7 +8,7 @@ class MyUser(HttpUser):
    def chat_completion(self):
        headers = {
            "Content-Type": "application/json",
-            "Authorization": f"Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
+            "Authorization": "Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
            # Include any additional headers you may need for authentication, etc.
        }
--- a/cookbook/Benchmarking_LLMs_by_use_case.ipynb
+++ b/cookbook/Benchmarking_LLMs_by_use_case.ipynb
--- a/cookbook/Evaluating_LLMs.ipynb
+++ b/cookbook/Evaluating_LLMs.ipynb
@ -36,9 +36,7 @@
   },
   "outputs": [],
   "source": [
-        "import litellm\n",
+    "from litellm import load_test_model, testing_batch_completion"
        "from litellm import load_test_model, testing_batch_completion\n",
        "import time"
   ]
  },
  {
--- a/cookbook/LiteLLM_Azure_and_OpenAI_example.ipynb
+++ b/cookbook/LiteLLM_Azure_and_OpenAI_example.ipynb
@ -1,21 +1,10 @@
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "BmX0b5Ueh91v"
   },
   "source": [
    "# LiteLLM - Azure OpenAI + OpenAI Calls\n",
    "This notebook covers the following for Azure OpenAI + OpenAI:\n",
@ -24,10 +13,7 @@
    "* Completion - Azure, OpenAI in separate threads\n",
    "* Completion - Stress Test 10 requests in parallel\n",
    "* Completion - Azure, OpenAI in the same thread"
-      ],
+   ]
      "metadata": {
        "id": "BmX0b5Ueh91v"
      }
  },
  {
   "cell_type": "code",
@ -42,57 +28,27 @@
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 2,
        "import os, litellm"
      ],
   "metadata": {
    "id": "mnveHO5dfcB0"
   },
-      "execution_count": 2,
+   "outputs": [],
-      "outputs": []
+   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "markdown",
      "source": [
        "## Completion - Quick start"
      ],
   "metadata": {
    "id": "eo88QUdbiDIE"
-      }
+   },
   "source": [
    "## Completion - Quick start"
   ]
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 12,
        "import os\n",
        "from litellm import completion\n",
        "\n",
        "# openai configs\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
        "\n",
        "# azure openai configs\n",
        "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
        "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
        "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
        "\n",
        "\n",
        "# openai call\n",
        "response = completion(\n",
        "    model = \"gpt-3.5-turbo\",\n",
        "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
        ")\n",
        "print(\"Openai Response\\n\")\n",
        "print(response)\n",
        "\n",
        "\n",
        "\n",
        "# azure call\n",
        "response = completion(\n",
        "    model = \"azure/your-azure-deployment\",\n",
        "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
        ")\n",
        "print(\"Azure Response\\n\")\n",
        "print(response)"
      ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
@ -100,11 +56,10 @@
    "id": "5OSosWNCfc_2",
    "outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
   },
      "execution_count": 12,
   "outputs": [
    {
          "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Openai Response\n",
      "\n",
@ -154,19 +109,54 @@
      "}\n"
     ]
    }
   ],
   "source": [
    "from litellm import completion\n",
    "\n",
    "# openai configs\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
    "\n",
    "# azure openai configs\n",
    "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
    "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
    "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
    "\n",
    "\n",
    "# openai call\n",
    "response = completion(\n",
    "    model = \"gpt-3.5-turbo\",\n",
    "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
    ")\n",
    "print(\"Openai Response\\n\")\n",
    "print(response)\n",
    "\n",
    "\n",
    "\n",
    "# azure call\n",
    "response = completion(\n",
    "    model = \"azure/your-azure-deployment\",\n",
    "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
    ")\n",
    "print(\"Azure Response\\n\")\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "markdown",
      "source": [
        "## Completion - Streaming"
      ],
   "metadata": {
    "id": "dQMkM-diiKdE"
-      }
+   },
   "source": [
    "## Completion - Streaming"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "uVvJDVn4g1i1"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "from litellm import completion\n",
@ -199,24 +189,24 @@
    "print(\"Azure Streaming response\")\n",
    "for chunk in response:\n",
    "  print(chunk)\n"
-      ],
+   ]
      "metadata": {
        "id": "uVvJDVn4g1i1"
      },
      "execution_count": null,
      "outputs": []
  },
  {
   "cell_type": "markdown",
      "source": [
        "## Completion - Azure, OpenAI in separate threads"
      ],
   "metadata": {
    "id": "4xrOPnt-oqwm"
-      }
+   },
   "source": [
    "## Completion - Azure, OpenAI in separate threads"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "V5b5taJPjvC3"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import threading\n",
@ -255,25 +245,25 @@
    "thread2.join()\n",
    "\n",
    "print(\"Both completions are done.\")"
-      ],
+   ]
      "metadata": {
        "id": "V5b5taJPjvC3"
      },
      "execution_count": null,
      "outputs": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "lx8DbMBqoAoN"
   },
   "source": [
    "## Completion - Stress Test 10 requests in parallel\n",
    "\n"
-      ],
+   ]
      "metadata": {
        "id": "lx8DbMBqoAoN"
      }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "pHYANOlOkoDh"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import threading\n",
@ -309,57 +299,20 @@
    "    thread.join()\n",
    "\n",
    "print(\"All completions are done.\")\n"
-      ],
+   ]
      "metadata": {
        "id": "pHYANOlOkoDh"
      },
      "execution_count": null,
      "outputs": []
  },
  {
   "cell_type": "markdown",
      "source": [
        "## Completion - Azure, OpenAI in the same thread"
      ],
   "metadata": {
    "id": "yB2NDOO4oxrp"
-      }
+   },
   "source": [
    "## Completion - Azure, OpenAI in the same thread"
   ]
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 23,
        "import os\n",
        "from litellm import completion\n",
        "\n",
        "# Function to make both OpenAI and Azure completions\n",
        "def make_completions():\n",
        "    # Set your OpenAI API key\n",
        "    os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
        "\n",
        "    # OpenAI completion\n",
        "    openai_response = completion(\n",
        "        model=\"gpt-3.5-turbo\",\n",
        "        messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
        "    )\n",
        "\n",
        "    print(\"OpenAI Response:\", openai_response)\n",
        "\n",
        "    # Set your Azure OpenAI API key and configuration\n",
        "    os.environ[\"AZURE_API_KEY\"] = \"\"\n",
        "    os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
        "    os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
        "\n",
        "    # Azure OpenAI completion\n",
        "    azure_response = completion(\n",
        "        model=\"azure/your-azure-deployment\",\n",
        "        messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
        "    )\n",
        "\n",
        "    print(\"Azure OpenAI Response:\", azure_response)\n",
        "\n",
        "# Call the function to make both completions in one thread\n",
        "make_completions()\n"
      ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
@ -367,11 +320,10 @@
    "id": "HTBqwzxpnxab",
    "outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
   },
      "execution_count": 23,
   "outputs": [
    {
          "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OpenAI Response: {\n",
      "  \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
@ -417,7 +369,54 @@
      "}\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "from litellm import completion\n",
    "\n",
    "# Function to make both OpenAI and Azure completions\n",
    "def make_completions():\n",
    "    # Set your OpenAI API key\n",
    "    os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
    "\n",
    "    # OpenAI completion\n",
    "    openai_response = completion(\n",
    "        model=\"gpt-3.5-turbo\",\n",
    "        messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
    "    )\n",
    "\n",
    "    print(\"OpenAI Response:\", openai_response)\n",
    "\n",
    "    # Set your Azure OpenAI API key and configuration\n",
    "    os.environ[\"AZURE_API_KEY\"] = \"\"\n",
    "    os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
    "    os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
    "\n",
    "    # Azure OpenAI completion\n",
    "    azure_response = completion(\n",
    "        model=\"azure/your-azure-deployment\",\n",
    "        messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
    "    )\n",
    "\n",
    "    print(\"Azure OpenAI Response:\", azure_response)\n",
    "\n",
    "# Call the function to make both completions in one thread\n",
    "make_completions()\n"
   ]
  }
-  ]
+ ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/cookbook/LiteLLM_Comparing_LLMs.ipynb
+++ b/cookbook/LiteLLM_Comparing_LLMs.ipynb
--- a/cookbook/LiteLLM_batch_completion.ipynb
+++ b/cookbook/LiteLLM_batch_completion.ipynb
@ -1,30 +1,16 @@
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "MbLbs1tbISk-"
   },
   "source": [
    "# LiteLLM Batch Completions Example\n",
    "\n",
    "* This tutorial walks through using `batch_completion`\n",
    "* Docs: https://docs.litellm.ai/docs/completion/batching"
-      ],
+   ]
      "metadata": {
        "id": "MbLbs1tbISk-"
      }
  },
  {
   "cell_type": "code",
@ -39,69 +25,42 @@
  },
  {
   "cell_type": "markdown",
      "source": [
        "## Import Batch Completion"
      ],
   "metadata": {
    "id": "KGhNJRUCIh1j"
-      }
+   },
   "source": [
    "## Import Batch Completion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "id": "LOtI43snDrSK"
   },
   "outputs": [],
   "source": [
        "import litellm\n",
    "import os\n",
    "from litellm import batch_completion\n",
    "\n",
    "# set your API_KEY\n",
    "os.environ['ANTHROPIC_API_KEY'] = \"\""
-      ],
+   ]
      "metadata": {
        "id": "LOtI43snDrSK"
      },
      "execution_count": 7,
      "outputs": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Xhv92NBaIpaw"
   },
   "source": [
    "## Calling `litellm.batch_completion`\n",
    "\n",
    "In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
-      ],
+   ]
      "metadata": {
        "id": "Xhv92NBaIpaw"
      }
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 11,
        "import litellm\n",
        "import os\n",
        "from litellm import batch_completion\n",
        "\n",
        "os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
        "\n",
        "\n",
        "responses = batch_completion(\n",
        "    model=\"claude-2\",\n",
        "    messages = [\n",
        "        [\n",
        "            {\n",
        "                \"role\": \"user\",\n",
        "                \"content\": \"good morning? \"\n",
        "            }\n",
        "        ],\n",
        "        [\n",
        "            {\n",
        "                \"role\": \"user\",\n",
        "                \"content\": \"what's the time? \"\n",
        "            }\n",
        "        ]\n",
        "    ]\n",
        ")\n",
        "responses"
      ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
@ -109,10 +68,8 @@
    "id": "yY7GIRLsDywu",
    "outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
   },
      "execution_count": 11,
   "outputs": [
    {
          "output_type": "execute_result",
     "data": {
      "text/plain": [
       "[<ModelResponse at 0x7a164eed4450> JSON: {\n",
@ -157,10 +114,50 @@
       " }]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
-          "execution_count": 11
+     "output_type": "execute_result"
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
    "\n",
    "\n",
    "responses = batch_completion(\n",
    "    model=\"claude-2\",\n",
    "    messages = [\n",
    "        [\n",
    "            {\n",
    "                \"role\": \"user\",\n",
    "                \"content\": \"good morning? \"\n",
    "            }\n",
    "        ],\n",
    "        [\n",
    "            {\n",
    "                \"role\": \"user\",\n",
    "                \"content\": \"what's the time? \"\n",
    "            }\n",
    "        ]\n",
    "    ]\n",
    ")\n",
    "responses"
   ]
  }
-  ]
+ ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/cookbook/Proxy_Batch_Users.ipynb
+++ b/cookbook/Proxy_Batch_Users.ipynb
@ -19,7 +19,8 @@
   "source": [
    "import csv\n",
    "from typing import Optional\n",
-        "import httpx, json\n",
+    "import httpx\n",
    "import json\n",
    "import asyncio\n",
    "\n",
    "proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
--- a/cookbook/TogetherAI_liteLLM.ipynb
+++ b/cookbook/TogetherAI_liteLLM.ipynb
@ -14,16 +14,16 @@
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
        "id": "pc6IO4V99O25",
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "pc6IO4V99O25",
    "outputId": "2d69da44-010b-41c2-b38b-5b478576bb8b"
   },
   "outputs": [
    {
          "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting litellm\n",
      "  Downloading litellm-0.1.482-py3-none-any.whl (69 kB)\n",
@ -134,11 +134,7 @@
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 4,
        "model_name = \"togethercomputer/CodeLlama-34b-Instruct\"\n",
        "response = completion(model=model_name, messages=messages, max_tokens=200)\n",
        "print(response)"
      ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
@ -146,11 +142,10 @@
    "id": "GIUevHlMvPb8",
    "outputId": "ad930a12-16e3-4400-fff4-38151e4f6da5"
   },
      "execution_count": 4,
   "outputs": [
    {
          "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[92mHere's your LiteLLM Dashboard 👉 \u001b[94m\u001b[4mhttps://admin.litellm.ai/6c0f0403-becb-44af-9724-7201c7d381d0\u001b[0m\n",
      "{\n",
@ -174,6 +169,11 @@
      "}\n"
     ]
    }
   ],
   "source": [
    "model_name = \"togethercomputer/CodeLlama-34b-Instruct\"\n",
    "response = completion(model=model_name, messages=messages, max_tokens=200)\n",
    "print(response)"
   ]
  },
  {
@ -976,7 +976,6 @@
    "messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
    "\n",
    "\n",
        "import asyncio\n",
    "async def parse_stream(stream):\n",
    "    async for elem in stream:\n",
    "        print(elem)\n",
--- a/cookbook/Using_Nemo_Guardrails_with_LiteLLM_Server.ipynb
+++ b/cookbook/Using_Nemo_Guardrails_with_LiteLLM_Server.ipynb
@ -1,51 +1,37 @@
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "eKXncoQbU_2j"
   },
   "source": [
    "# Using Nemo-Guardrails with LiteLLM Server\n",
    "\n",
    "[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
-      ],
+   ]
      "metadata": {
        "id": "eKXncoQbU_2j"
      }
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "ZciYaLwvuFbu"
   },
   "source": [
    "## Using with Bedrock\n",
    "\n",
    "`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
-      ],
+   ]
      "metadata": {
        "id": "ZciYaLwvuFbu"
      }
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": null,
        "pip install nemoguardrails langchain"
      ],
   "metadata": {
    "id": "vOUwGSJ2Vsy3"
   },
-      "execution_count": null,
+   "outputs": [],
-      "outputs": []
+   "source": [
    "pip install nemoguardrails langchain"
   ]
  },
  {
   "cell_type": "code",
@ -55,7 +41,6 @@
   },
   "outputs": [],
   "source": [
        "import openai\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "\n",
    "llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
@ -73,6 +58,9 @@
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "vz5n00qyuKjp"
   },
   "source": [
    "## Using with TogetherAI\n",
    "\n",
@ -80,15 +68,16 @@
    "`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
    "\n",
    "2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
-      ],
+   ]
      "metadata": {
        "id": "vz5n00qyuKjp"
      }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "XK1sk-McuhpE"
   },
   "outputs": [],
   "source": [
        "import openai\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "\n",
    "llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
@ -102,26 +91,26 @@
    "    \"role\": \"user\",\n",
    "    \"content\": \"Hello! What can you do for me?\"\n",
    "}])"
-      ],
+   ]
      "metadata": {
        "id": "XK1sk-McuhpE"
      },
      "execution_count": null,
      "outputs": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "8A1KWKnzuxAS"
   },
   "source": [
    "### CONFIG.YML\n",
    "\n",
    "save this example `config.yml` in your current directory"
-      ],
+   ]
      "metadata": {
        "id": "8A1KWKnzuxAS"
      }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "NKN1GmSvu0Cx"
   },
   "outputs": [],
   "source": [
    "# instructions:\n",
    "#   - type: general\n",
@ -148,12 +137,21 @@
    "#   - type: main\n",
    "#     engine: openai\n",
    "#     model: claude-instant-1"
   ]
  }
 ],
 "metadata": {
-        "id": "NKN1GmSvu0Cx"
+  "colab": {
   "provenance": []
  },
-      "execution_count": null,
+  "kernelspec": {
-      "outputs": []
+   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
-  ]
+ },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/cookbook/benchmark/eval_suites_mlflow_autoevals/auto_evals.py
+++ b/cookbook/benchmark/eval_suites_mlflow_autoevals/auto_evals.py
@ -1,16 +1,12 @@
 import sys, os
 import traceback
 from dotenv import load_dotenv
 load_dotenv()
 import litellm
 from litellm import embedding, completion, completion_cost
 from autoevals.llm import *
 ###################
 import litellm
 # litellm completion call
 question = "which country has the highest population"
--- a/cookbook/codellama-server/main.py
+++ b/cookbook/codellama-server/main.py
@ -1,11 +1,12 @@
 import traceback
-from flask import Flask, request, jsonify, abort, Response
+from flask import Flask, request, Response
 from flask_cors import CORS
 import traceback
 import litellm
 from util import handle_error
 from litellm import completion
-import os, dotenv, time
+import os
 import dotenv
 import time
 import json
 dotenv.load_dotenv()
@ -20,9 +21,9 @@ verbose = True
 # litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/
 ######### PROMPT LOGGING ##########
-os.environ[
+os.environ["PROMPTLAYER_API_KEY"] = (
-    "PROMPTLAYER_API_KEY"
+    ""  # set your promptlayer key here - https://promptlayer.com/
-] = ""  # set your promptlayer key here - https://promptlayer.com/
+)
 # set callbacks
 litellm.success_callback = ["promptlayer"]
@ -57,9 +58,9 @@ def api_completion():
    try:
        if "prompt" not in data:
            raise ValueError("data needs to have prompt")
-        data[
+        data["model"] = (
-            "model"
+            "togethercomputer/CodeLlama-34b-Instruct"  # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
-        ] = "togethercomputer/CodeLlama-34b-Instruct"  # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
+        )
        # COMPLETION CALL
        system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."
        messages = [
@ -75,7 +76,7 @@ def api_completion():
            "stream" in data and data["stream"] == True
        ):  # use generate_responses to stream responses
            return Response(data_generator(response), mimetype="text/event-stream")
-    except Exception as e:
+    except Exception:
        # call handle_error function
        print_verbose(f"Got Error api_completion(): {traceback.format_exc()}")
        ## LOG FAILURE
--- a/cookbook/community-resources/get_hf_models.py
+++ b/cookbook/community-resources/get_hf_models.py
@ -1,5 +1,4 @@
 import requests
 from urllib.parse import urlparse, parse_qs
 def get_next_url(response):
--- a/cookbook/liteLLM_Baseten.ipynb
+++ b/cookbook/liteLLM_Baseten.ipynb
@ -1,21 +1,10 @@
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "gZx-wHJapG5w"
   },
   "source": [
    "# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
    "\n",
@ -30,10 +19,7 @@
    "model = \"q841o8w\" # baseten model version ID\n",
    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
    "```"
-      ],
+   ]
      "metadata": {
        "id": "gZx-wHJapG5w"
      }
  },
  {
   "cell_type": "code",
@ -49,55 +35,50 @@
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 2,
        "import os\n",
        "import litellm\n",
        "from litellm import completion"
      ],
   "metadata": {
    "id": "VEukLhDzo4vw"
   },
-      "execution_count": 2,
+   "outputs": [],
-      "outputs": []
+   "source": [
    "import os\n",
    "from litellm import completion"
   ]
  },
  {
   "cell_type": "markdown",
      "source": [
        "## Setup"
      ],
   "metadata": {
    "id": "4STYM2OHFNlc"
-      }
+   },
   "source": [
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 21,
        "os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
        "messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
      ],
   "metadata": {
    "id": "DorpLxw1FHbC"
   },
-      "execution_count": 21,
+   "outputs": [],
-      "outputs": []
+   "source": [
    "os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
    "messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "syF3dTdKFSQQ"
   },
   "source": [
    "## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
    "### Pass Your Baseten model `Version ID` as `model`"
-      ],
+   ]
      "metadata": {
        "id": "syF3dTdKFSQQ"
      }
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 18,
        "model = \"qvv0xeq\"\n",
        "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
        "response"
      ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
@ -105,18 +86,16 @@
    "id": "rPgSoMlsojz0",
    "outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
   },
      "execution_count": 18,
   "outputs": [
    {
          "output_type": "stream",
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32mINFO\u001b[0m API key set.\n",
      "INFO:baseten:API key set.\n"
     ]
    },
    {
          "output_type": "execute_result",
     "data": {
      "text/plain": [
       "{'choices': [{'finish_reason': 'stop',\n",
@ -127,28 +106,30 @@
       " 'model': 'qvv0xeq'}"
      ]
     },
     "execution_count": 18,
     "metadata": {},
-          "execution_count": 18
+     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = \"qvv0xeq\"\n",
    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
    "response"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "7n21UroEGCGa"
   },
   "source": [
    "## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
    "### Pass Your Baseten model `Version ID` as `model`"
-      ],
+   ]
      "metadata": {
        "id": "7n21UroEGCGa"
      }
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 19,
        "model = \"q841o8w\"\n",
        "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
        "response"
      ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
@ -156,18 +137,16 @@
    "id": "uLVWFH899lAF",
    "outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
   },
      "execution_count": 19,
   "outputs": [
    {
          "output_type": "stream",
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32mINFO\u001b[0m API key set.\n",
      "INFO:baseten:API key set.\n"
     ]
    },
    {
          "output_type": "execute_result",
     "data": {
      "text/plain": [
       "{'choices': [{'finish_reason': 'stop',\n",
@ -178,28 +157,30 @@
       " 'model': 'q841o8w'}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
-          "execution_count": 19
+     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = \"q841o8w\"\n",
    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
    "response"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "6-TFwmPAGPXq"
   },
   "source": [
    "## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
    "### Pass Your Baseten model `Version ID` as `model`"
-      ],
+   ]
      "metadata": {
        "id": "6-TFwmPAGPXq"
      }
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 20,
        "model = \"31dxrj3\"\n",
        "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
        "response"
      ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
@ -207,18 +188,16 @@
    "id": "gbeYZOrUE_Bp",
    "outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
   },
      "execution_count": 20,
   "outputs": [
    {
          "output_type": "stream",
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32mINFO\u001b[0m API key set.\n",
      "INFO:baseten:API key set.\n"
     ]
    },
    {
          "output_type": "execute_result",
     "data": {
      "text/plain": [
       "{'choices': [{'finish_reason': 'stop',\n",
@ -229,10 +208,30 @@
       " 'model': '31dxrj3'}"
      ]
     },
     "execution_count": 20,
     "metadata": {},
-          "execution_count": 20
+     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = \"31dxrj3\"\n",
    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
    "response"
   ]
  }
-  ]
+ ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/cookbook/liteLLM_Langchain_Demo.ipynb
+++ b/cookbook/liteLLM_Langchain_Demo.ipynb
@ -1,21 +1,10 @@
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "5hwntUxTMxEk"
   },
   "source": [
    "# Langchain liteLLM Demo Notebook\n",
    "## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
@ -30,10 +19,7 @@
    "ChatLiteLLM(model=\"command-nightly\")\n",
    "ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
    "```"
-      ],
+   ]
      "metadata": {
        "id": "5hwntUxTMxEk"
      }
  },
  {
   "cell_type": "code",
@ -48,25 +34,39 @@
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 2,
        "import os\n",
        "from langchain.chat_models import ChatLiteLLM\n",
        "from langchain.prompts.chat import (\n",
        "    ChatPromptTemplate,\n",
        "    SystemMessagePromptTemplate,\n",
        "    AIMessagePromptTemplate,\n",
        "    HumanMessagePromptTemplate,\n",
        ")\n",
        "from langchain.schema import AIMessage, HumanMessage, SystemMessage"
      ],
   "metadata": {
    "id": "MOhRaVnhB-0J"
   },
-      "execution_count": 2,
+   "outputs": [],
-      "outputs": []
+   "source": [
    "import os\n",
    "from langchain.chat_models import ChatLiteLLM\n",
    "from langchain.schema import HumanMessage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "TahkCtlmCD65",
    "outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.environ['OPENAI_API_KEY'] = \"\"\n",
    "chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
@ -76,30 +76,30 @@
    "    )\n",
    "]\n",
    "chat(messages)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "TahkCtlmCD65",
        "outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
      },
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
            ]
          },
          "metadata": {},
          "execution_count": 17
        }
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "uXNDyU4jChcs",
    "outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
    "chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
@ -109,30 +109,30 @@
    "    )\n",
    "]\n",
    "chat(messages)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "uXNDyU4jChcs",
        "outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
      },
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
            ]
          },
          "metadata": {},
          "execution_count": 23
        }
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "czbDJRKcC7BV",
    "outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
    "chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
@ -142,30 +142,30 @@
    "    )\n",
    "]\n",
    "chat(messages)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "czbDJRKcC7BV",
        "outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
      },
      "execution_count": 27,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
            ]
          },
          "metadata": {},
          "execution_count": 27
        }
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "tZxpq5PDDY9Y",
    "outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.environ['COHERE_API_KEY'] = \"\"\n",
    "chat = ChatLiteLLM(model=\"command-nightly\")\n",
@ -175,27 +175,21 @@
    "    )\n",
    "]\n",
    "chat(messages)"
   ]
  }
 ],
 "metadata": {
  "colab": {
-          "base_uri": "https://localhost:8080/"
+   "provenance": []
  },
-        "id": "tZxpq5PDDY9Y",
+  "kernelspec": {
-        "outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
+   "display_name": "Python 3",
   "name": "python3"
  },
-      "execution_count": 30,
+  "language_info": {
-      "outputs": [
+   "name": "python"
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
            ]
          },
          "metadata": {},
          "execution_count": 30
  }
-      ]
+ },
-    }
+ "nbformat": 4,
-  ]
+ "nbformat_minor": 0
 }
--- a/cookbook/liteLLM_VertextAI_Example.ipynb
+++ b/cookbook/liteLLM_VertextAI_Example.ipynb
@ -43,7 +43,7 @@
   "source": [
    "# set you Vertex AI configs\n",
    "import litellm\n",
-    "from litellm import embedding, completion\n",
+    "from litellm import completion\n",
    "\n",
    "litellm.vertex_project = \"hardy-device-386718\"\n",
    "litellm.vertex_location = \"us-central1\""
--- a/cookbook/liteLLM_function_calling.ipynb
+++ b/cookbook/liteLLM_function_calling.ipynb
@ -1,80 +1,71 @@
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "vnvlwUDZK7VA"
   },
   "source": [
    "## Demo Notebook of Function Calling with liteLLM\n",
    "- Supported Providers for Function Calling\n",
    "  - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
    "- In this notebook we use function calling with `litellm.completion()`"
-      ],
+   ]
      "metadata": {
        "id": "vnvlwUDZK7VA"
      }
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": null,
        "## Install liteLLM\n",
        "!pip install litellm"
      ],
   "metadata": {
    "id": "KrINCwRfLgZV"
   },
-      "execution_count": null,
+   "outputs": [],
-      "outputs": []
+   "source": [
    "## Install liteLLM\n",
    "!pip install litellm"
   ]
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 2,
        "import os, litellm\n",
        "from litellm import completion"
      ],
   "metadata": {
    "id": "nK7zR5OgLlh2"
   },
-      "execution_count": 2,
+   "outputs": [],
-      "outputs": []
+   "source": [
    "import os\n",
    "from litellm import completion"
   ]
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 27,
        "os.environ['OPENAI_API_KEY'] = \"\" #@param"
      ],
   "metadata": {
    "id": "dCQlyBxKLqbA"
   },
-      "execution_count": 27,
+   "outputs": [],
-      "outputs": []
+   "source": [
    "os.environ['OPENAI_API_KEY'] = \"\" #@param"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "gfdGv-FMRCdX"
   },
   "source": [
    "## Define Messages, Functions\n",
    "We create a get_current_weather() function and pass that to GPT 3.5\n",
    "\n",
    "See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
-      ],
+   ]
      "metadata": {
        "id": "gfdGv-FMRCdX"
      }
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "id": "ERzsP1sfM19C"
   },
   "outputs": [],
   "source": [
    "messages = [\n",
    "    {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
@ -104,28 +95,20 @@
    "      }\n",
    "    }\n",
    "  ]"
-      ],
+   ]
      "metadata": {
        "id": "ERzsP1sfM19C"
      },
      "execution_count": 25,
      "outputs": []
  },
  {
   "cell_type": "markdown",
      "source": [
        "## Call gpt-3.5-turbo-0613 to Decide what Function to call"
      ],
   "metadata": {
    "id": "NX6by2VuRPnp"
-      }
+   },
   "source": [
    "## Call gpt-3.5-turbo-0613 to Decide what Function to call"
   ]
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 9,
        "response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
        "print(response)"
      ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
@ -133,11 +116,10 @@
    "id": "QVoJ5PtxMlVx",
    "outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
   },
      "execution_count": 9,
   "outputs": [
    {
          "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
@ -166,24 +148,25 @@
      "}\n"
     ]
    }
   ],
   "source": [
    "response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Yu0o2saDNLx8"
   },
   "source": [
    "## Parse GPT 3.5 Response\n",
    "Read Information about what Function to Call"
-      ],
+   ]
      "metadata": {
        "id": "Yu0o2saDNLx8"
      }
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 11,
        "function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
        "function_call_data"
      ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
@ -191,10 +174,8 @@
    "id": "u1DzXLJsNOR5",
    "outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
   },
      "execution_count": 11,
   "outputs": [
    {
          "output_type": "execute_result",
     "data": {
      "text/plain": [
       "<OpenAIObject at 0x7922c70ce930> JSON: {\n",
@ -203,20 +184,19 @@
       "}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
-          "execution_count": 11
+     "output_type": "execute_result"
    }
   ],
   "source": [
    "function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
    "function_call_data"
   ]
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 20,
        "import json\n",
        "function_name = function_call_data['name']\n",
        "function_args = function_call_data['arguments']\n",
        "function_args = json.loads(function_args)\n",
        "print(function_name, function_args)\n"
      ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
@ -224,33 +204,35 @@
    "id": "tYb96Mh0NhH9",
    "outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
   },
      "execution_count": 20,
   "outputs": [
    {
          "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "get_current_weather {'location': 'Boston, MA'}\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "function_name = function_call_data['name']\n",
    "function_args = function_call_data['arguments']\n",
    "function_args = json.loads(function_args)\n",
    "print(function_name, function_args)\n"
   ]
  },
  {
   "cell_type": "markdown",
      "source": [
        "## Call the get_current_weather() function"
      ],
   "metadata": {
    "id": "z3tstH_yN3fX"
-      }
+   },
   "source": [
    "## Call the get_current_weather() function"
   ]
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 24,
        "if function_name == \"get_current_weather\":\n",
        "  result = get_current_weather(**function_args)\n",
        "  print(result)"
      ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
@ -258,37 +240,33 @@
    "id": "TSb8JHhgN5Zc",
    "outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
   },
      "execution_count": 24,
   "outputs": [
    {
          "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "12F\n"
     ]
    }
   ],
   "source": [
    "if function_name == \"get_current_weather\":\n",
    "  result = get_current_weather(**function_args)\n",
    "  print(result)"
   ]
  },
  {
   "cell_type": "markdown",
      "source": [
        "## Send the response from get_current_weather back to the model to summarize"
      ],
   "metadata": {
    "id": "k4HGJE3NRmMI"
-      }
+   },
   "source": [
    "## Send the response from get_current_weather back to the model to summarize"
   ]
  },
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": 26,
        "messages = [\n",
        "    {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
        "    {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
        "    {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
        "]\n",
        "response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
        "print(response)"
      ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
@ -296,11 +274,10 @@
    "id": "a23cmEwiPaw7",
    "outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
   },
      "execution_count": 26,
   "outputs": [
    {
          "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
@ -325,7 +302,30 @@
      "}\n"
     ]
    }
   ],
   "source": [
    "messages = [\n",
    "    {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
    "    {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
    "    {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
    "]\n",
    "response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
    "print(response)"
   ]
  }
-  ]
+ ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/cookbook/litellm-ollama-docker-image/test.py
+++ b/cookbook/litellm-ollama-docker-image/test.py
@ -1,13 +1,13 @@
 import openai
-api_base = f"http://0.0.0.0:8000"
+api_base = "http://0.0.0.0:8000"
 openai.api_base = api_base
 openai.api_key = "temp-key"
 print(openai.api_base)
-print(f"LiteLLM: response from proxy with streaming")
+print("LiteLLM: response from proxy with streaming")
 response = openai.ChatCompletion.create(
    model="ollama/llama2",
    messages=[
--- a/cookbook/litellm_Test_Multiple_Providers.ipynb
+++ b/cookbook/litellm_Test_Multiple_Providers.ipynb
--- a/cookbook/litellm_model_fallback.ipynb
+++ b/cookbook/litellm_model_fallback.ipynb
@ -1,29 +1,15 @@
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
 "cells": [
  {
   "cell_type": "code",
-      "source": [
+   "execution_count": null,
        "!pip install litellm"
      ],
   "metadata": {
    "id": "j6yJsCGeaq8G"
   },
-      "execution_count": null,
+   "outputs": [],
-      "outputs": []
+   "source": [
    "!pip install litellm"
   ]
  },
  {
   "cell_type": "code",
@ -33,8 +19,7 @@
   },
   "outputs": [],
   "source": [
-        "import litellm\n",
+    "from litellm import completion\n",
        "from litellm import embedding, completion\n",
    "\n",
    "model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n",
    "\n",
@ -44,9 +29,23 @@
    "for model in model_fallback_list:\n",
    "  try:\n",
    "      response = completion(model=model, messages=messages)\n",
-        "  except Exception as e:\n",
+    "  except Exception:\n",
    "      print(f\"error occurred: {traceback.format_exc()}\")"
   ]
  }
-  ]
+ ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/cookbook/litellm_router/load_test_proxy.py
+++ b/cookbook/litellm_router/load_test_proxy.py
@ -1,14 +1,12 @@
-import sys, os
+import sys
-import traceback
+import os
 from dotenv import load_dotenv
 load_dotenv()
 import os, io
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
 from litellm import Router
 import litellm
@ -137,7 +135,7 @@ for future in futures:
    else:
        failed_calls += 1
-print(f"Load test Summary:")
+print("Load test Summary:")
 print(f"Total Requests: {concurrent_calls}")
 print(f"Successful Calls: {successful_calls}")
 print(f"Failed Calls: {failed_calls}")
--- a/cookbook/litellm_router/load_test_queuing.py
+++ b/cookbook/litellm_router/load_test_queuing.py
@ -1,14 +1,12 @@
-import sys, os
+import sys
-import traceback
+import os
 from dotenv import load_dotenv
 load_dotenv()
 import os, io
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
 from litellm import Router
 import litellm
@ -160,7 +158,7 @@ for future in futures:
        else:
            failed_calls += 1
-print(f"Load test Summary:")
+print("Load test Summary:")
 print(f"Total Requests: {concurrent_calls}")
 print(f"Successful Calls: {successful_calls}")
 print(f"Failed Calls: {failed_calls}")
--- a/cookbook/litellm_router/load_test_router.py
+++ b/cookbook/litellm_router/load_test_router.py
@ -1,14 +1,12 @@
-import sys, os
+import sys
-import traceback
+import os
 from dotenv import load_dotenv
 load_dotenv()
 import os, io
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
 from litellm import Router
 import litellm
@ -132,7 +130,7 @@ for future in futures:
    else:
        failed_calls += 1
-print(f"Load test Summary:")
+print("Load test Summary:")
 print(f"Total Requests: {concurrent_calls}")
 print(f"Successful Calls: {successful_calls}")
 print(f"Failed Calls: {failed_calls}")
--- a/cookbook/litellm_router_load_test/memory_usage/router_endpoint.py
+++ b/cookbook/litellm_router_load_test/memory_usage/router_endpoint.py
@ -1,14 +1,9 @@
 from fastapi import FastAPI
 import uvicorn
-from memory_profiler import profile, memory_usage
+from memory_profiler import profile
 import os
 import traceback
 import asyncio
 import pytest
 import litellm
 from litellm import Router
 from concurrent.futures import ThreadPoolExecutor
 from collections import defaultdict
 from dotenv import load_dotenv
 import uuid
--- a/cookbook/litellm_router_load_test/memory_usage/router_memory_usage
+++ b/cookbook/litellm_router_load_test/memory_usage/router_memory_usage
@ -1,17 +1,16 @@
 #### What this tests ####
-from memory_profiler import profile, memory_usage
+from memory_profiler import profile
-import sys, os, time
+import sys
-import traceback, asyncio
+import os
-import pytest
+import time
 import asyncio
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
 from litellm import Router
 from concurrent.futures import ThreadPoolExecutor
 from collections import defaultdict
 from dotenv import load_dotenv
 import uuid
--- a/cookbook/litellm_router_load_test/memory_usage/router_memory_usage.py
+++ b/cookbook/litellm_router_load_test/memory_usage/router_memory_usage.py
@ -1,17 +1,16 @@
 #### What this tests ####
-from memory_profiler import profile, memory_usage
+from memory_profiler import profile
-import sys, os, time
+import sys
-import traceback, asyncio
+import os
-import pytest
+import time
 import asyncio
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
 from litellm import Router
 from concurrent.futures import ThreadPoolExecutor
 from collections import defaultdict
 from dotenv import load_dotenv
 import uuid
--- a/cookbook/litellm_router_load_test/test_loadtest_openai_client.py
+++ b/cookbook/litellm_router_load_test/test_loadtest_openai_client.py
@ -1,17 +1,14 @@
-import sys, os
+import sys
-import traceback
+import os
 from dotenv import load_dotenv
 import copy
 load_dotenv()
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import asyncio
-from litellm import Router, Timeout
+from litellm import Timeout
 import time
 from litellm.caching.caching import Cache
 import litellm
 import openai
 ### Test just calling AsyncAzureOpenAI
--- a/cookbook/litellm_router_load_test/test_loadtest_router.py
+++ b/cookbook/litellm_router_load_test/test_loadtest_router.py
@ -1,7 +1,6 @@
-import sys, os
+import sys
-import traceback
+import os
 from dotenv import load_dotenv
 import copy
 load_dotenv()
 sys.path.insert(
--- a/cookbook/litellm_router_load_test/test_loadtest_router_withs3_cache.py
+++ b/cookbook/litellm_router_load_test/test_loadtest_router_withs3_cache.py
@ -1,7 +1,6 @@
-import sys, os
+import sys
-import traceback
+import os
 from dotenv import load_dotenv
 import copy
 load_dotenv()
 sys.path.insert(
--- a/cookbook/misc/add_new_models.py
+++ b/cookbook/misc/add_new_models.py
@ -1,5 +1,4 @@
 import requests
 import json
 def get_initial_config():
--- a/cookbook/misc/migrate_proxy_config.py
+++ b/cookbook/misc/migrate_proxy_config.py
@ -36,7 +36,7 @@ def migrate_models(config_file, proxy_base_url):
        litellm_model_name = litellm_params.get("model", "") or ""
        if "vertex_ai/" in litellm_model_name:
-            print(f"\033[91m\nSkipping Vertex AI model\033[0m", model)
+            print("\033[91m\nSkipping Vertex AI model\033[0m", model)
            continue
        for param, value in litellm_params.items():
--- a/cookbook/misc/openai_timeouts.py
+++ b/cookbook/misc/openai_timeouts.py
@ -1,7 +1,6 @@
 import os
 from openai import OpenAI
 from dotenv import load_dotenv
 import httpx
 import concurrent.futures
 load_dotenv()
--- a/cookbook/misc/sagmaker_streaming.py
+++ b/cookbook/misc/sagmaker_streaming.py
@ -2,21 +2,16 @@
 import json
 import boto3
-import sys, os
+import sys
-import traceback
+import os
 from dotenv import load_dotenv
 load_dotenv()
-import os, io
+import io
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
 import litellm
 import io
 import json
 class TokenIterator:
@ -48,7 +43,6 @@ payload = {
    "stream": True,
 }
 import boto3
 client = boto3.client("sagemaker-runtime", region_name="us-west-2")
 response = client.invoke_endpoint_with_response_stream(
--- a/cookbook/mlflow_langchain_tracing_litellm_proxy.ipynb
+++ b/cookbook/mlflow_langchain_tracing_litellm_proxy.ipynb
@ -111,7 +111,6 @@
   },
   "outputs": [],
   "source": [
    "import mlflow\n",
    "mlflow.langchain.autolog()"
   ]
  },
--- a/db_scripts/create_views.py
+++ b/db_scripts/create_views.py
@ -3,7 +3,6 @@ python script to pre-create all views required by LiteLLM Proxy Server
 """
 import asyncio
 import os
 # Enter your DATABASE_URL here
@ -33,7 +32,7 @@ async def check_view_exists():  # noqa: PLR0915
        # Try to select one row from the view
        await db.query_raw("""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1""")
        print("LiteLLM_VerificationTokenView Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        # If an error occurs, the view does not exist, so create it
        await db.execute_raw(
            """
@ -54,7 +53,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""")
        print("MonthlyGlobalSpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS 
        SELECT
@ -74,7 +73,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "Last30dKeysBySpend" LIMIT 1""")
        print("Last30dKeysBySpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE OR REPLACE VIEW "Last30dKeysBySpend" AS
        SELECT 
@ -102,7 +101,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "Last30dModelsBySpend" LIMIT 1""")
        print("Last30dModelsBySpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE OR REPLACE VIEW "Last30dModelsBySpend" AS
        SELECT
@ -124,7 +123,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpendPerKey" LIMIT 1""")
        print("MonthlyGlobalSpendPerKey Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
            CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerKey" AS 
            SELECT
@ -147,7 +146,7 @@ async def check_view_exists():  # noqa: PLR0915
            """SELECT 1 FROM "MonthlyGlobalSpendPerUserPerKey" LIMIT 1"""
        )
        print("MonthlyGlobalSpendPerUserPerKey Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
            CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerUserPerKey" AS 
            SELECT
@ -171,7 +170,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM DailyTagSpend LIMIT 1""")
        print("DailyTagSpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE OR REPLACE VIEW DailyTagSpend AS
        SELECT
@ -189,7 +188,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "Last30dTopEndUsersSpend" LIMIT 1""")
        print("Last30dTopEndUsersSpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE VIEW "Last30dTopEndUsersSpend" AS
        SELECT end_user, COUNT(*) AS total_events, SUM(spend) AS total_spend
--- a/enterprise/enterprise_callbacks/example_logging_api.py
+++ b/enterprise/enterprise_callbacks/example_logging_api.py
@ -17,7 +17,7 @@ async def log_event(request: Request):
        # For now, just printing the received data
        return {"message": "Request received successfully"}
-    except Exception as e:
+    except Exception:
        raise HTTPException(status_code=500, detail="Internal Server Error")
--- a/enterprise/enterprise_callbacks/generic_api_callback.py
+++ b/enterprise/enterprise_callbacks/generic_api_callback.py
@ -2,12 +2,10 @@
 #### What this does ####
 #    On success, logs events to Promptlayer
-import dotenv, os
+import os
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.caching.caching import DualCache
-from typing import Literal, Union, Optional
+from typing import Optional
 import traceback
@ -15,10 +13,8 @@ import traceback
 #### What this does ####
 #    On success + failure, log events to Supabase
-import dotenv, os
+import litellm
-import traceback
+import uuid
 import datetime, subprocess, sys
 import litellm, uuid
 from litellm._logging import print_verbose, verbose_logger
--- a/enterprise/enterprise_hooks/aporia_ai.py
+++ b/enterprise/enterprise_hooks/aporia_ai.py
@ -11,9 +11,9 @@ import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-from typing import Optional, Literal, Union, Any
+from typing import Optional, Literal, Any
-import litellm, traceback, sys, uuid
+import litellm
-from litellm.caching.caching import DualCache
+import sys
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_guardrail import CustomGuardrail
 from fastapi import HTTPException
@ -23,14 +23,10 @@ from litellm.litellm_core_utils.logging_utils import (
    convert_litellm_response_object_to_str,
 )
 from typing import List
 from datetime import datetime
 import aiohttp, asyncio
 from litellm._logging import verbose_proxy_logger
 from litellm.llms.custom_httpx.http_handler import (
    get_async_httpx_client,
    httpxSpecialProvider,
 )
 import httpx
 import json
 from litellm.types.guardrails import GuardrailEventHooks
@ -147,7 +143,6 @@ class AporiaGuardrail(CustomGuardrail):
        from litellm.proxy.common_utils.callback_utils import (
            add_guardrail_to_applied_guardrails_header,
        )
        from litellm.types.guardrails import GuardrailEventHooks
        """
        Use this for the post call moderation with Guardrails
@ -183,7 +178,6 @@ class AporiaGuardrail(CustomGuardrail):
        from litellm.proxy.common_utils.callback_utils import (
            add_guardrail_to_applied_guardrails_header,
        )
        from litellm.types.guardrails import GuardrailEventHooks
        event_type: GuardrailEventHooks = GuardrailEventHooks.during_call
        if self.should_run_guardrail(data=data, event_type=event_type) is not True:
--- a/enterprise/enterprise_hooks/banned_keywords.py
+++ b/enterprise/enterprise_hooks/banned_keywords.py
@ -7,14 +7,13 @@
 ## Reject a call / response if it contains certain keywords
-from typing import Optional, Literal
+from typing import Literal
 import litellm
 from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_proxy_logger
 from fastapi import HTTPException
 import json, traceback
 class _ENTERPRISE_BannedKeywords(CustomLogger):
@ -73,7 +72,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
            - check if user id part of call
            - check if user id part of blocked list
            """
-            self.print_verbose(f"Inside Banned Keyword List Pre-Call Hook")
+            self.print_verbose("Inside Banned Keyword List Pre-Call Hook")
            if call_type == "completion" and "messages" in data:
                for m in data["messages"]:
                    if "content" in m and isinstance(m["content"], str):
--- a/enterprise/enterprise_hooks/blocked_user_list.py
+++ b/enterprise/enterprise_hooks/blocked_user_list.py
@ -15,7 +15,6 @@ from litellm.proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
 from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_proxy_logger
 from fastapi import HTTPException
 import json, traceback
 class _ENTERPRISE_BlockedUserList(CustomLogger):
@ -69,7 +68,7 @@ class _ENTERPRISE_BlockedUserList(CustomLogger):
                - check if end-user in cache
                - check if end-user in db
            """
-            self.print_verbose(f"Inside Blocked User List Pre-Call Hook")
+            self.print_verbose("Inside Blocked User List Pre-Call Hook")
            if "user_id" in data or "user" in data:
                user = data.get("user_id", data.get("user", ""))
                if (
--- a/enterprise/enterprise_hooks/google_text_moderation.py
+++ b/enterprise/enterprise_hooks/google_text_moderation.py
@ -7,21 +7,12 @@
 #  Thank you users! We ❤️ you! - Krrish & Ishaan
-from typing import Optional, Literal, Union
+from typing import Literal
-import litellm, traceback, sys, uuid
+import litellm
 from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
 from litellm.utils import (
    ModelResponse,
    EmbeddingResponse,
    ImageResponse,
    StreamingChoices,
 )
 from datetime import datetime
 import aiohttp, asyncio
 class _ENTERPRISE_GoogleTextModeration(CustomLogger):
--- a/enterprise/enterprise_hooks/llama_guard.py
+++ b/enterprise/enterprise_hooks/llama_guard.py
@ -7,28 +7,24 @@
 # +-------------------------------------------------------------+
 #  Thank you users! We ❤️ you! - Krrish & Ishaan
-import sys, os
+import sys
 import os
 from collections.abc import Iterable
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-from typing import Optional, Literal, Union
+from typing import Optional, Literal
-import litellm, traceback, sys, uuid
+import litellm
-from litellm.caching.caching import DualCache
+import sys
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
 from litellm.types.utils import (
    ModelResponse,
    EmbeddingResponse,
    ImageResponse,
    StreamingChoices,
    Choices,
 )
 from datetime import datetime
 import aiohttp, asyncio
 litellm.set_verbose = True
--- a/enterprise/enterprise_hooks/llm_guard.py
+++ b/enterprise/enterprise_hooks/llm_guard.py
@ -7,26 +7,13 @@
 #  Thank you users! We ❤️ you! - Krrish & Ishaan
 ## This provides an LLM Guard Integration for content moderation on the proxy
-from typing import Optional, Literal, Union
+from typing import Optional, Literal
 import litellm
 import traceback
 import sys
 import uuid
 import os
 from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
 from litellm.utils import (
    ModelResponse,
    EmbeddingResponse,
    ImageResponse,
    StreamingChoices,
 )
 from datetime import datetime
 import aiohttp
 import asyncio
 from litellm.utils import get_formatted_prompt
 from litellm.secret_managers.main import get_secret_str
@ -164,7 +151,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
                "moderation",
                "audio_transcription",
            ]
-        except Exception as e:
+        except Exception:
            self.print_verbose(
                f"Call Type - {call_type}, not in accepted list - ['completion','embeddings','image_generation','moderation','audio_transcription']"
            )
--- a/enterprise/enterprise_hooks/openai_moderation.py
+++ b/enterprise/enterprise_hooks/openai_moderation.py
@ -5,27 +5,19 @@
 # +-------------------------------------------------------------+
 #  Thank you users! We ❤️ you! - Krrish & Ishaan
-import sys, os
+import sys
 import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-from typing import Optional, Literal, Union
+from typing import Literal
-import litellm, traceback, sys, uuid
+import litellm
-from litellm.caching.caching import DualCache
+import sys
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
 from litellm.utils import (
    ModelResponse,
    EmbeddingResponse,
    ImageResponse,
    StreamingChoices,
 )
 from datetime import datetime
 import aiohttp, asyncio
 from litellm._logging import verbose_proxy_logger
 litellm.set_verbose = True
--- a/enterprise/enterprise_hooks/secret_detection.py
+++ b/enterprise/enterprise_hooks/secret_detection.py
@ -471,8 +471,6 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
        data: dict,
        call_type: str,  # "completion", "embeddings", "image_generation", "moderation"
    ):
        from detect_secrets import SecretsCollection
        from detect_secrets.settings import default_settings
        if await self.should_run_check(user_api_key_dict) is False:
            return
--- a/enterprise/utils.py
+++ b/enterprise/utils.py
@ -1,6 +1,5 @@
 # Enterprise Proxy Util Endpoints
 from typing import Optional, List
 from litellm._logging import verbose_logger
 from litellm.proxy.proxy_server import PrismaClient, HTTPException
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
 import collections
@ -116,7 +115,7 @@ async def ui_get_spend_by_tags(
 def _forecast_daily_cost(data: list):
-    from datetime import datetime, timedelta
+    from datetime import timedelta
    if len(data) == 0:
        return {
--- a/litellm/init.py
+++ b/litellm/init.py
@ -1063,9 +1063,9 @@ from .llms.sagemaker.chat.transformation import SagemakerChatConfig
 from .llms.ollama_chat import OllamaChatConfig
 from .llms.bedrock.chat.invoke_handler import (
    AmazonCohereChatConfig,
    AmazonConverseConfig,
    bedrock_tool_name_mappings,
 )
 from .llms.bedrock.chat.converse_transformation import AmazonConverseConfig
 from .llms.bedrock.common_utils import (
    AmazonTitanConfig,
    AmazonAI21Config,
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@ -1,7 +1,6 @@
 import json
 import logging
 import os
 import traceback
 from datetime import datetime
 from logging import Formatter
--- a/litellm/_redis.py
+++ b/litellm/_redis.py
@ -12,12 +12,11 @@ import json
 # s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
 import os
-from typing import Dict, List, Optional, Union
+from typing import List, Optional, Union
 import redis  # type: ignore
 import redis.asyncio as async_redis  # type: ignore
 import litellm
 from litellm import get_secret, get_secret_str
 from ._logging import verbose_logger
--- a/litellm/adapters/anthropic_adapter.py
+++ b/litellm/adapters/anthropic_adapter.py
@ -1,23 +1,12 @@
 # What is this?
 ## Translates OpenAI call to Anthropic `/v1/messages` format
 import json
 import os
 import traceback
-import uuid
+from typing import Any, Optional
 from typing import Any, Literal, Optional
 import dotenv
 import httpx
 from pydantic import BaseModel
 import litellm
 from litellm import ChatCompletionRequest, verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.types.llms.anthropic import (
+from litellm.types.llms.anthropic import AnthropicMessagesRequest, AnthropicResponse
    AnthropicMessagesRequest,
    AnthropicResponse,
    ContentBlockDelta,
 )
 from litellm.types.utils import AdapterCompletionStreamWrapper, ModelResponse
--- a/litellm/assistants/main.py
+++ b/litellm/assistants/main.py
@ -7,12 +7,11 @@ from functools import partial
 from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union
 import httpx
-from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
+from openai import AsyncOpenAI, OpenAI
 from openai.types.beta.assistant import Assistant
 from openai.types.beta.assistant_deleted import AssistantDeleted
 import litellm
 from litellm.llms.azure import assistants
 from litellm.types.router import GenericLiteLLMParams
 from litellm.utils import (
    exception_type,
--- a/litellm/batch_completion/main.py
+++ b/litellm/batch_completion/main.py
@ -144,7 +144,6 @@ def batch_completion_models(*args, **kwargs):
        This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models.
        It sends requests concurrently and returns the response from the first model that responds.
    """
    import concurrent
    if "model" in kwargs:
        kwargs.pop("model")
--- a/litellm/batches/main.py
+++ b/litellm/batches/main.py
@ -19,24 +19,11 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union
 import httpx
 import litellm
 from litellm import client
 from litellm.llms.azure.azure import AzureBatchesAPI
 from litellm.llms.openai.openai import OpenAIBatchesAPI
-from litellm.llms.vertex_ai.batches.handler import (
+from litellm.llms.vertex_ai.batches.handler import VertexAIBatchPrediction
-    VertexAIBatchPrediction,
+from litellm.secret_managers.main import get_secret_str
-)
+from litellm.types.llms.openai import Batch, CreateBatchRequest, RetrieveBatchRequest
 from litellm.secret_managers.main import get_secret, get_secret_str
 from litellm.types.llms.openai import (
    Batch,
    CancelBatchRequest,
    CreateBatchRequest,
    CreateFileRequest,
    FileContentRequest,
    FileObject,
    FileTypes,
    HttpxBinaryResponseContent,
    RetrieveBatchRequest,
 )
 from litellm.types.router import GenericLiteLLMParams
 from litellm.utils import supports_httpx_timeout
--- a/litellm/budget_manager.py
+++ b/litellm/budget_manager.py
@ -11,7 +11,7 @@ import json
 import os
 import threading
 import time
-from typing import Literal, Optional, Union
+from typing import Literal, Optional
 import litellm
 from litellm.utils import ModelResponse
--- a/litellm/caching/caching.py
+++ b/litellm/caching/caching.py
@ -8,16 +8,12 @@
 #  Thank you users! We ❤️ you! - Krrish & Ishaan
 import ast
 import asyncio
 import hashlib
 import inspect
 import io
 import json
 import logging
 import time
 import traceback
 from enum import Enum
-from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
+from typing import Any, Dict, List, Optional, Set, Union
 from openai.types.audio.transcription_create_params import TranscriptionCreateParams
 from openai.types.chat.completion_create_params import (
@ -41,7 +37,7 @@ from litellm.types.utils import all_litellm_params
 from .base_cache import BaseCache
 from .disk_cache import DiskCache
-from .dual_cache import DualCache
+from .dual_cache import DualCache  # noqa
 from .in_memory_cache import InMemoryCache
 from .qdrant_semantic_cache import QdrantSemanticCache
 from .redis_cache import RedisCache
--- a/litellm/caching/caching_handler.py
+++ b/litellm/caching/caching_handler.py
@ -35,13 +35,7 @@ from pydantic import BaseModel
 import litellm
 from litellm._logging import print_verbose, verbose_logger
-from litellm.caching.caching import (
+from litellm.caching.caching import S3Cache
    Cache,
    QdrantSemanticCache,
    RedisCache,
    RedisSemanticCache,
    S3Cache,
 )
 from litellm.litellm_core_utils.logging_utils import (
    _assemble_complete_response_from_streaming_chunks,
 )
@ -550,12 +544,7 @@ class LLMCachingHandler:
        Returns:
            Optional[Any]:
        """
-        from litellm.utils import (
+        from litellm.utils import convert_to_model_response_object
            CustomStreamWrapper,
            convert_to_model_response_object,
            convert_to_streaming_response,
            convert_to_streaming_response_async,
        )
        if (
            call_type == CallTypes.acompletion.value
--- a/litellm/caching/disk_cache.py
+++ b/litellm/caching/disk_cache.py
@ -1,8 +1,6 @@
 import json
 from typing import TYPE_CHECKING, Any, Optional
 from litellm._logging import print_verbose
 from .base_cache import BaseCache
 if TYPE_CHECKING:
--- a/litellm/caching/dual_cache.py
+++ b/litellm/caching/dual_cache.py
@ -12,7 +12,7 @@ import asyncio
 import time
 import traceback
 from concurrent.futures import ThreadPoolExecutor
-from typing import TYPE_CHECKING, Any, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, List, Optional
 import litellm
 from litellm._logging import print_verbose, verbose_logger
--- a/litellm/caching/qdrant_semantic_cache.py
+++ b/litellm/caching/qdrant_semantic_cache.py
@ -15,7 +15,6 @@ from typing import Any
 import litellm
 from litellm._logging import print_verbose
 from litellm.types.caching import LiteLLMCacheType
 from .base_cache import BaseCache
--- a/litellm/caching/redis_cache.py
+++ b/litellm/caching/redis_cache.py
@ -13,7 +13,6 @@ import asyncio
 import inspect
 import json
 import time
 import traceback
 from datetime import timedelta
 from typing import TYPE_CHECKING, Any, List, Optional, Tuple
@ -21,8 +20,7 @@ import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
 from litellm.types.caching import RedisPipelineIncrementOperation
-from litellm.types.services import ServiceLoggerPayload, ServiceTypes
+from litellm.types.services import ServiceTypes
 from litellm.types.utils import all_litellm_params
 from .base_cache import BaseCache
@ -53,7 +51,6 @@ class RedisCache(BaseCache):
        startup_nodes: Optional[List] = None,  # for redis-cluster
        **kwargs,
    ):
        import redis
        from litellm._service_logger import ServiceLogging
--- a/litellm/caching/redis_semantic_cache.py
+++ b/litellm/caching/redis_semantic_cache.py
@ -32,7 +32,6 @@ class RedisSemanticCache(BaseCache):
        **kwargs,
    ):
        from redisvl.index import SearchIndex
        from redisvl.query import VectorQuery
        print_verbose(
            "redis semantic-cache initializing INDEX - litellm_semantic_cache_index"
@ -141,7 +140,6 @@ class RedisSemanticCache(BaseCache):
    def get_cache(self, key, **kwargs):
        print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}")
        import numpy as np
        from redisvl.query import VectorQuery
        # query
@ -253,7 +251,6 @@ class RedisSemanticCache(BaseCache):
    async def async_get_cache(self, key, **kwargs):
        print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}")
        import numpy as np
        from redisvl.query import VectorQuery
        from litellm.proxy.proxy_server import llm_model_list, llm_router
--- a/litellm/caching/s3_cache.py
+++ b/litellm/caching/s3_cache.py
@ -12,11 +12,9 @@ Has 4 methods:
 import ast
 import asyncio
 import json
-from typing import Any, Optional
+from typing import Optional
 import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.types.caching import LiteLLMCacheType
 from .base_cache import BaseCache
@ -103,7 +101,6 @@ class S3Cache(BaseCache):
        self.set_cache(key=key, value=value, **kwargs)
    def get_cache(self, key, **kwargs):
        import boto3
        import botocore
        try:
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -1,7 +1,6 @@
 # What is this?
 ## File for 'response_cost' calculation in Logging
 import time
 import traceback
 from typing import Any, List, Literal, Optional, Tuple, Union
 from pydantic import BaseModel
@ -44,14 +43,12 @@ from litellm.llms.openai.cost_calculation import (
    cost_per_second as openai_cost_per_second,
 )
 from litellm.llms.openai.cost_calculation import cost_per_token as openai_cost_per_token
 from litellm.llms.openai.cost_calculation import cost_router as openai_cost_router
 from litellm.llms.together_ai.cost_calculator import get_model_params_and_category
 from litellm.llms.vertex_ai.image_generation.cost_calculator import (
    cost_calculator as vertex_ai_image_cost_calculator,
 )
 from litellm.types.llms.openai import HttpxBinaryResponseContent
 from litellm.types.rerank import RerankResponse
 from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
 from litellm.types.utils import CallTypesLiteral, PassthroughCallTypes, Usage
 from litellm.utils import (
    CallTypes,
--- a/litellm/files/main.py
+++ b/litellm/files/main.py
@ -14,14 +14,11 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union, cast
 import httpx
 import litellm
-from litellm import client, get_secret_str
+from litellm import get_secret_str
 from litellm.llms.azure.files.handler import AzureOpenAIFilesAPI
 from litellm.llms.openai.openai import FileDeleted, FileObject, OpenAIFilesAPI
-from litellm.llms.vertex_ai.files.handler import (
+from litellm.llms.vertex_ai.files.handler import VertexAIFilesHandler
    VertexAIFilesHandler,
 )
 from litellm.types.llms.openai import (
    Batch,
    CreateFileRequest,
    FileContentRequest,
    FileTypes,
--- a/litellm/fine_tuning/main.py
+++ b/litellm/fine_tuning/main.py
@ -19,10 +19,10 @@ import httpx
 import litellm
 from litellm._logging import verbose_logger
 from litellm.llms.azure.fine_tuning.handler import AzureOpenAIFineTuningAPI
-from litellm.llms.openai.fine_tuning.handler import OpenAIFineTuningAPI, FineTuningJob, FineTuningJobCreate
+from litellm.llms.openai.fine_tuning.handler import FineTuningJob, OpenAIFineTuningAPI
 from litellm.llms.vertex_ai.fine_tuning.handler import VertexFineTuningAPI
 from litellm.secret_managers.main import get_secret_str
-from litellm.types.llms.openai import Hyperparameters
+from litellm.types.llms.openai import FineTuningJobCreate, Hyperparameters
 from litellm.types.router import *
 from litellm.utils import supports_httpx_timeout
--- a/litellm/integrations/SlackAlerting/batching_handler.py
+++ b/litellm/integrations/SlackAlerting/batching_handler.py
@ -6,11 +6,9 @@ Slack alerts are sent every 10s or when events are greater than X events
 see custom_batch_logger.py for more details / defaults 
 """
-import os
+from typing import TYPE_CHECKING, Any
 from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
-from litellm._logging import verbose_logger, verbose_proxy_logger
+from litellm._logging import verbose_proxy_logger
 from litellm.proxy._types import AlertType, WebhookEvent
 if TYPE_CHECKING:
    from .slack_alerting import SlackAlerting as _SlackAlerting
@ -21,7 +19,6 @@ else:
 def squash_payloads(queue):
    import json
    squashed = {}
    if len(queue) == 0:
--- a/litellm/integrations/SlackAlerting/slack_alerting.py
+++ b/litellm/integrations/SlackAlerting/slack_alerting.py
@ -4,16 +4,10 @@ import asyncio
 import datetime
 import os
 import random
 import threading
 import time
-import traceback
+from datetime import timedelta
-from datetime import datetime as dt
+from typing import Any, Dict, List, Literal, Optional, Union
 from datetime import timedelta, timezone
 from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Set, TypedDict, Union, get_args
 import aiohttp
 import dotenv
 from openai import APIError
 import litellm
@ -26,22 +20,13 @@ from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.litellm_core_utils.exception_mapping_utils import (
    _add_key_name_and_team_to_alert,
 )
 from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
-from litellm.proxy._types import (
+from litellm.proxy._types import AlertType, CallInfo, VirtualKeyEvent, WebhookEvent
    AlertType,
    CallInfo,
    UserAPIKeyAuth,
    VirtualKeyEvent,
    WebhookEvent,
 )
 from litellm.router import Router
 from litellm.types.integrations.slack_alerting import *
 from litellm.types.router import LiteLLM_Params
 from ..email_templates.templates import *
 from .batching_handler import send_to_webhook, squash_payloads
@ -1261,7 +1246,7 @@ Model Info:
        Returns -> True if sent, False if not.
        """
-        from litellm.proxy.proxy_server import premium_user, prisma_client
+        from litellm.proxy.proxy_server import premium_user
        from litellm.proxy.utils import send_email
        email_logo_url = os.getenv(
@ -1370,7 +1355,6 @@ Model Info:
        if alert_type not in self.alert_types:
            return
        import json
        from datetime import datetime
        # Get the current timestamp
--- a/litellm/integrations/SlackAlerting/utils.py
+++ b/litellm/integrations/SlackAlerting/utils.py
@ -5,7 +5,6 @@ Utils used for slack alerting
 import asyncio
 from typing import Dict, List, Optional, Union
 import litellm
 from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.proxy._types import AlertType
 from litellm.secret_managers.main import get_secret
--- a/litellm/integrations/argilla.py
+++ b/litellm/integrations/argilla.py
@ -6,14 +6,9 @@ import asyncio
 import json
 import os
 import random
 import time
 import traceback
 import types
-import uuid
+from typing import Any, Dict, List, Optional
 from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional, TypedDict, Union
 import dotenv  # type: ignore
 import httpx
 from pydantic import BaseModel  # type: ignore
@ -21,11 +16,7 @@ import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.prompt_templates.common_utils import (
    get_content_from_model_response,
 )
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
@ -33,7 +24,6 @@ from litellm.types.integrations.argilla import (
    SUPPORTED_PAYLOAD_FIELDS,
    ArgillaCredentialsObject,
    ArgillaItem,
    ArgillaPayload,
 )
 from litellm.types.utils import StandardLoggingPayload
--- a/litellm/integrations/arize_ai.py
+++ b/litellm/integrations/arize_ai.py
@ -5,7 +5,7 @@ this file has Arize ai specific helper functions
 """
 import json
-from typing import TYPE_CHECKING, Any, Optional, Union
+from typing import TYPE_CHECKING, Any, Optional
 from litellm._logging import verbose_logger
@ -30,7 +30,6 @@ class ArizeLogger:
    def set_arize_ai_attributes(span: Span, kwargs, response_obj):
        from litellm.integrations._types.open_inference import (
            MessageAttributes,
            MessageContentAttributes,
            OpenInferenceSpanKindValues,
            SpanAttributes,
        )
--- a/litellm/integrations/azure_storage/azure_storage.py
+++ b/litellm/integrations/azure_storage/azure_storage.py
@ -3,23 +3,8 @@ import json
 import os
 import uuid
 from datetime import datetime, timedelta
-from re import S, T
+from typing import List, Optional
 from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    Dict,
    List,
    Optional,
    Tuple,
    TypedDict,
    Union,
 )
 import httpx
 from pydantic import BaseModel, Field
 import litellm
 from litellm._logging import verbose_logger
 from litellm.constants import AZURE_STORAGE_MSFT_VERSION
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
--- a/litellm/integrations/braintrust_logging.py
+++ b/litellm/integrations/braintrust_logging.py
@ -2,15 +2,10 @@
 ## Log success + failure events to Braintrust
 import copy
 import json
 import os
 import threading
 import traceback
 import uuid
 from datetime import datetime
-from typing import Literal, Optional
+from typing import Optional
 import dotenv
 import httpx
 from pydantic import BaseModel
@ -18,12 +13,11 @@ import litellm
 from litellm import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    HTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
-from litellm.utils import get_formatted_prompt, print_verbose
+from litellm.utils import print_verbose
 global_braintrust_http_handler = get_async_httpx_client(
    llm_provider=httpxSpecialProvider.LoggingCallback
--- a/litellm/integrations/custom_batch_logger.py
+++ b/litellm/integrations/custom_batch_logger.py
@ -6,7 +6,7 @@ Use this if you want your logs to be stored in memory and flushed periodically
 import asyncio
 import time
-from typing import List, Literal, Optional
+from typing import List, Optional
 import litellm
 from litellm._logging import verbose_logger
--- a/litellm/integrations/custom_guardrail.py
+++ b/litellm/integrations/custom_guardrail.py
@ -1,4 +1,4 @@
-from typing import List, Literal, Optional
+from typing import List, Optional
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@ -1,18 +1,14 @@
 #### What this does ####
 #    On success, logs events to Promptlayer
 import os
 import traceback
 from datetime import datetime as datetimeObj
 from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
 import dotenv
 from pydantic import BaseModel
 from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.types.integrations.argilla import ArgillaItem
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest
 from litellm.types.services import ServiceLoggerPayload
 from litellm.types.utils import (
    AdapterCompletionStreamWrapper,
    EmbeddingResponse,
--- a/litellm/integrations/datadog/datadog.py
+++ b/litellm/integrations/datadog/datadog.py
@ -16,11 +16,10 @@ For batching specific details see CustomBatchLogger class
 import asyncio
 import datetime
 import os
 import sys
 import traceback
 import uuid
 from datetime import datetime as datetimeObj
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, List, Optional, Union
 from httpx import Response
@ -32,7 +31,6 @@ from litellm.llms.custom_httpx.http_handler import (
    get_async_httpx_client,
    httpxSpecialProvider,
 )
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.types.integrations.datadog import *
 from litellm.types.services import ServiceLoggerPayload
 from litellm.types.utils import StandardLoggingPayload
--- a/litellm/integrations/datadog/datadog_llm_obs.py
+++ b/litellm/integrations/datadog/datadog_llm_obs.py
@ -8,12 +8,9 @@ API Reference: https://docs.datadoghq.com/llm_observability/setup/api/?tab=examp
 import asyncio
 import os
 import traceback
 import uuid
 from datetime import datetime
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional
 from httpx import Response
 import litellm
 from litellm._logging import verbose_logger
--- a/litellm/integrations/dynamodb.py
+++ b/litellm/integrations/dynamodb.py
@ -1,14 +1,11 @@
 #### What this does ####
 #    On success + failure, log events to Supabase
 import datetime
 import os
 import traceback
 import uuid
 from typing import Any
 import dotenv
 import litellm
--- a/litellm/integrations/email_alerting.py
+++ b/litellm/integrations/email_alerting.py
@ -2,7 +2,6 @@
 Functions for sending Email Alerts
 """
 import asyncio
 import os
 from typing import List, Optional
@ -20,7 +19,7 @@ async def get_all_team_member_emails(team_id: Optional[str] = None) -> list:
    )
    if team_id is None:
        return []
-    from litellm.proxy.proxy_server import premium_user, prisma_client
+    from litellm.proxy.proxy_server import prisma_client
    if prisma_client is None:
        raise Exception("Not connected to DB!")
@ -72,7 +71,6 @@ async def send_team_budget_alert(webhook_event: WebhookEvent) -> bool:
    Send an Email Alert to All Team Members when the Team Budget is crossed
    Returns -> True if sent, False if not.
    """
    from litellm.proxy.proxy_server import premium_user, prisma_client
    from litellm.proxy.utils import send_email
    _team_id = webhook_event.team_id
--- a/litellm/integrations/galileo.py
+++ b/litellm/integrations/galileo.py
@ -1,15 +1,12 @@
 import os
 from datetime import datetime
 from typing import Any, Dict, List, Optional
 import httpx
 from pydantic import BaseModel, Field
 import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.llms.custom_httpx.http_handler import (
    _get_httpx_client,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
--- a/litellm/integrations/gcs_bucket/gcs_bucket.py
+++ b/litellm/integrations/gcs_bucket/gcs_bucket.py
@ -1,27 +1,14 @@
 import asyncio
 import json
 import os
 import uuid
 from datetime import datetime
-from re import S
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypedDict, Union
 import httpx
 from pydantic import BaseModel, Field
 import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from litellm.proxy._types import CommonProxyErrors
 from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload
 from litellm.types.integrations.gcs_bucket import *
-from litellm.types.utils import (
+from litellm.types.utils import StandardLoggingPayload
    StandardCallbackDynamicParams,
    StandardLoggingMetadata,
    StandardLoggingPayload,
 )
 if TYPE_CHECKING:
    from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
--- a/litellm/integrations/gcs_bucket/gcs_bucket_base.py
+++ b/litellm/integrations/gcs_bucket/gcs_bucket_base.py
@ -1,13 +1,7 @@
 import json
 import os
-import uuid
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
 from datetime import datetime
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypedDict, Union
 import httpx
 from pydantic import BaseModel, Field
 import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.llms.custom_httpx.http_handler import (
@ -15,11 +9,7 @@ from litellm.llms.custom_httpx.http_handler import (
    httpxSpecialProvider,
 )
 from litellm.types.integrations.gcs_bucket import *
-from litellm.types.utils import (
+from litellm.types.utils import StandardCallbackDynamicParams, StandardLoggingPayload
    StandardCallbackDynamicParams,
    StandardLoggingMetadata,
    StandardLoggingPayload,
 )
 if TYPE_CHECKING:
    from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
@ -190,9 +180,7 @@ class GCSBucketBase(CustomBatchLogger):
        This function is used to get the Vertex instance for the GCS Bucket Logger.
        It checks if the Vertex instance is already created and cached, if not it creates a new instance and caches it.
        """
-        from litellm.llms.vertex_ai.vertex_llm_base import (
+        from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
            VertexBase,
        )
        _in_memory_key = self._get_in_memory_key_for_vertex_instance(credentials)
        if _in_memory_key not in self.vertex_instances:
--- a/litellm/integrations/helicone.py
+++ b/litellm/integrations/helicone.py
@ -3,10 +3,7 @@
 import os
 import traceback
 import dotenv
 import litellm
 from litellm._logging import verbose_logger
 class HeliconeLogger:
--- a/litellm/integrations/lago.py
+++ b/litellm/integrations/lago.py
@ -3,11 +3,9 @@
 import json
 import os
 import traceback
 import uuid
 from typing import Literal, Optional
 import dotenv
 import httpx
 import litellm
--- a/litellm/integrations/langfuse/langfuse.py
+++ b/litellm/integrations/langfuse/langfuse.py
@ -3,7 +3,6 @@
 import copy
 import os
 import traceback
 import types
 from collections.abc import MutableMapping, MutableSequence, MutableSet
 from typing import TYPE_CHECKING, Any, Dict, Optional, cast
--- a/litellm/integrations/langfuse/langfuse_handler.py
+++ b/litellm/integrations/langfuse/langfuse_handler.py
@ -6,11 +6,8 @@ Used to get the LangFuseLogger for a given request
 Handles Key/Team Based Langfuse Logging
 """
 import os
 from typing import TYPE_CHECKING, Any, Dict, Optional
 from packaging.version import Version
 from litellm.litellm_core_utils.litellm_logging import StandardCallbackDynamicParams
 from .langfuse import LangFuseLogger, LangfuseLoggingConfig
--- a/litellm/integrations/langsmith.py
+++ b/litellm/integrations/langsmith.py
@ -3,14 +3,12 @@
 import asyncio
 import os
 import random
 import time
 import traceback
 import types
 import uuid
 from datetime import datetime, timezone
-from typing import Any, Dict, List, Optional, TypedDict, Union
+from typing import Any, Dict, List, Optional
 import dotenv  # type: ignore
 import httpx
 from pydantic import BaseModel  # type: ignore
@ -18,7 +16,6 @@ import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
--- a/litellm/integrations/langtrace.py
+++ b/litellm/integrations/langtrace.py
@ -1,9 +1,7 @@
 import traceback
 import json
-from litellm.integrations.custom_logger import CustomLogger
+from typing import TYPE_CHECKING, Any
 from litellm.proxy._types import SpanAttributes
-from typing import TYPE_CHECKING, Any, Optional, Union
+from litellm.proxy._types import SpanAttributes
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
--- a/litellm/integrations/openmeter.py
+++ b/litellm/integrations/openmeter.py
@ -3,17 +3,12 @@
 import json
 import os
 import traceback
 import uuid
 import dotenv
 import httpx
 import litellm
 from litellm import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    HTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -1,7 +1,6 @@
 import os
 from dataclasses import dataclass
 from datetime import datetime
 from functools import wraps
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 import litellm
@ -10,10 +9,7 @@ from litellm.integrations.custom_logger import CustomLogger
 from litellm.types.services import ServiceLoggerPayload
 from litellm.types.utils import (
    ChatCompletionMessageToolCall,
    EmbeddingResponse,
    Function,
    ImageResponse,
    ModelResponse,
    StandardLoggingPayload,
 )
@ -139,7 +135,6 @@ class OpenTelemetry(CustomLogger):
        end_time: Optional[Union[datetime, float]] = None,
        event_metadata: Optional[dict] = None,
    ):
        from datetime import datetime
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
@ -201,7 +196,6 @@ class OpenTelemetry(CustomLogger):
        end_time: Optional[Union[float, datetime]] = None,
        event_metadata: Optional[dict] = None,
    ):
        from datetime import datetime
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
@ -666,7 +660,6 @@ class OpenTelemetry(CustomLogger):
        span.set_attribute(key, primitive_value)
    def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
        from litellm.proxy._types import SpanAttributes
        kwargs.get("optional_params", {})
        litellm_params = kwargs.get("litellm_params", {}) or {}
@ -834,7 +827,6 @@ class OpenTelemetry(CustomLogger):
        logging_payload: ManagementEndpointLoggingPayload,
        parent_otel_span: Optional[Span] = None,
    ):
        from datetime import datetime
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
@ -889,7 +881,6 @@ class OpenTelemetry(CustomLogger):
        logging_payload: ManagementEndpointLoggingPayload,
        parent_otel_span: Optional[Span] = None,
    ):
        from datetime import datetime
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
--- a/litellm/integrations/opik/utils.py
+++ b/litellm/integrations/opik/utils.py
@ -3,8 +3,6 @@ import os
 import time
 from typing import Dict, Final, List, Optional
 from litellm.types.utils import ModelResponse
 CONFIG_FILE_PATH_DEFAULT: Final[str] = "~/.opik.config"
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -1,15 +1,10 @@
 # used for /metrics endpoint on LiteLLM Proxy
 #### What this does ####
 #    On success, log events to Prometheus
 import os
 import subprocess
 import sys
-import traceback
+from datetime import datetime, timedelta
-import uuid
+from typing import Optional
 from datetime import date, datetime, timedelta
 from typing import Optional, TypedDict, Union
 import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
--- a/litellm/integrations/prometheus_helpers/prometheus_api.py
+++ b/litellm/integrations/prometheus_helpers/prometheus_api.py
@ -2,13 +2,10 @@
 Helper functions to query prometheus API
 """
 import asyncio
 import os
 import time
 from datetime import datetime, timedelta
 from typing import Optional
 import litellm
 from litellm import get_secret
 from litellm._logging import verbose_logger
 from litellm.llms.custom_httpx.http_handler import (
--- a/litellm/integrations/prometheus_services.py
+++ b/litellm/integrations/prometheus_services.py
@ -3,15 +3,8 @@
 #    On success + failure, log events to Prometheus for litellm / adjacent services (litellm, redis, postgres, llm api providers)
 import datetime
 import os
 import subprocess
 import sys
 import traceback
 import uuid
 from typing import List, Optional, Union
 import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.types.integrations.prometheus import LATENCY_BUCKETS
 from litellm.types.services import ServiceLoggerPayload, ServiceTypes
--- a/litellm/integrations/s3.py
+++ b/litellm/integrations/s3.py
@ -1,12 +1,6 @@
 #### What this does ####
 #    On success + failure, log events to Supabase
 import datetime
 import os
 import subprocess
 import sys
 import traceback
 import uuid
 from typing import Optional
 import litellm
--- a/litellm/integrations/supabase.py
+++ b/litellm/integrations/supabase.py
@ -1,14 +1,11 @@
 #### What this does ####
 #    On success + failure, log events to Supabase
 import datetime
 import os
 import subprocess
 import sys
 import traceback
 import dotenv
 import litellm
--- a/litellm/integrations/traceloop.py
+++ b/litellm/integrations/traceloop.py
@ -1,6 +1,5 @@
 import traceback
 import litellm
 from litellm._logging import verbose_logger
@ -12,9 +11,7 @@ class TraceloopLogger:
    def __init__(self):
        try:
            from opentelemetry.sdk.trace.export import ConsoleSpanExporter
            from traceloop.sdk import Traceloop
            from traceloop.sdk.instruments import Instruments
            from traceloop.sdk.tracing.tracing import TracerWrapper
        except ModuleNotFoundError as e:
            verbose_logger.error(
@ -39,7 +36,6 @@ class TraceloopLogger:
        level="DEFAULT",
        status_message=None,
    ):
        from opentelemetry import trace
        from opentelemetry.semconv.ai import SpanAttributes
        from opentelemetry.trace import SpanKind, Status, StatusCode
@ -78,7 +74,7 @@ class TraceloopLogger:
                    )
                if "top_p" in optional_params:
                    span.set_attribute(
-                        SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
+                        SpanAttributes.LLM_REQUEST_TOP_P, optional_params.get("top_p")
                    )
                if "tools" in optional_params or "functions" in optional_params:
                    span.set_attribute(
--- a/litellm/integrations/weights_biases.py
+++ b/litellm/integrations/weights_biases.py
@ -173,16 +173,14 @@ except Exception:
 #### What this does ####
 #    On success, logs events to Langfuse
 import os
 import traceback
 from datetime import datetime
 class WeightsBiasesLogger:
    # Class variables or attributes
    def __init__(self):
        try:
-            import wandb
+            pass
        except Exception:
            raise Exception(
                "\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m"
--- a/litellm/litellm_core_utils/asyncify.py
+++ b/litellm/litellm_core_utils/asyncify.py
@ -3,7 +3,6 @@ from typing import Awaitable, Callable, Optional
 import anyio
 import anyio.to_thread
 from anyio import to_thread
 from typing_extensions import ParamSpec, TypeVar
 T_ParamSpec = ParamSpec("T_ParamSpec")
--- a/litellm/litellm_core_utils/core_helpers.py
+++ b/litellm/litellm_core_utils/core_helpers.py
@ -1,7 +1,6 @@
 # What is this?
 ## Helper utilities
-import os
+from typing import TYPE_CHECKING, Any, Optional, Union
 from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
 import httpx
--- a/litellm/litellm_core_utils/exception_mapping_utils.py
+++ b/litellm/litellm_core_utils/exception_mapping_utils.py
@ -1,6 +1,4 @@
 import json
 import os
 import threading
 import traceback
 from typing import Optional
@ -14,17 +12,14 @@ from ..exceptions import (
    APIError,
    AuthenticationError,
    BadRequestError,
    BudgetExceededError,
    ContentPolicyViolationError,
    ContextWindowExceededError,
    NotFoundError,
    OpenAIError,
    PermissionDeniedError,
    RateLimitError,
    ServiceUnavailableError,
    Timeout,
    UnprocessableEntityError,
    UnsupportedParamsError,
 )
--- a/Show more
+++ b/Show more