(code quality) run ruff rule to ban unused imports (#7313)

* remove unused imports * fix AmazonConverseConfig * fix test * fix import * ruff check fixes * test fixes * fix testing * fix imports
2025-04-25 18:54:30 +00:00 · 2024-12-19 12:33:42 -08:00 · 2024-12-19 12:33:42 -08:00 · c7f14e936a
commit c7f14e936a
parent 5e344497ce
347 changed files with 5473 additions and 7207 deletions
--- a/.github/workflows/locustfile.py
+++ b/.github/workflows/locustfile.py
@ -1,6 +1,4 @@
-from locust import HttpUser, task, between, events
+from locust import HttpUser, task, between
 import json
 import time
 class MyUser(HttpUser):
@ -10,7 +8,7 @@ class MyUser(HttpUser):
    def chat_completion(self):
        headers = {
            "Content-Type": "application/json",
-            "Authorization": f"Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
+            "Authorization": "Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
            # Include any additional headers you may need for authentication, etc.
        }
--- a/cookbook/Benchmarking_LLMs_by_use_case.ipynb
+++ b/cookbook/Benchmarking_LLMs_by_use_case.ipynb
--- a/cookbook/Evaluating_LLMs.ipynb
+++ b/cookbook/Evaluating_LLMs.ipynb
--- a/cookbook/LiteLLM_Azure_and_OpenAI_example.ipynb
+++ b/cookbook/LiteLLM_Azure_and_OpenAI_example.ipynb
@ -1,423 +1,422 @@
 {
-  "nbformat": 4,
+ "cells": [
-  "nbformat_minor": 0,
+  {
-  "metadata": {
+   "cell_type": "markdown",
-    "colab": {
+   "metadata": {
-      "provenance": []
+    "id": "BmX0b5Ueh91v"
-    },
+   },
-    "kernelspec": {
+   "source": [
-      "name": "python3",
+    "# LiteLLM - Azure OpenAI + OpenAI Calls\n",
-      "display_name": "Python 3"
+    "This notebook covers the following for Azure OpenAI + OpenAI:\n",
-    },
+    "* Completion - Quick start\n",
-    "language_info": {
+    "* Completion - Streaming\n",
-      "name": "python"
+    "* Completion - Azure, OpenAI in separate threads\n",
-    }
+    "* Completion - Stress Test 10 requests in parallel\n",
    "* Completion - Azure, OpenAI in the same thread"
   ]
  },
-  "cells": [
+  {
-    {
+   "cell_type": "code",
-      "cell_type": "markdown",
+   "execution_count": null,
-      "source": [
+   "metadata": {
-        "# LiteLLM - Azure OpenAI + OpenAI Calls\n",
+    "id": "iHq4d0dpfawS"
-        "This notebook covers the following for Azure OpenAI + OpenAI:\n",
+   },
-        "* Completion - Quick start\n",
+   "outputs": [],
-        "* Completion - Streaming\n",
+   "source": [
-        "* Completion - Azure, OpenAI in separate threads\n",
+    "!pip install litellm"
-        "* Completion - Stress Test 10 requests in parallel\n",
+   ]
-        "* Completion - Azure, OpenAI in the same thread"
+  },
-      ],
+  {
-      "metadata": {
+   "cell_type": "code",
-        "id": "BmX0b5Ueh91v"
+   "execution_count": 2,
-      }
+   "metadata": {
    "id": "mnveHO5dfcB0"
   },
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "eo88QUdbiDIE"
   },
   "source": [
    "## Completion - Quick start"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "5OSosWNCfc_2",
    "outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
   },
   "outputs": [
    {
-      "cell_type": "code",
+     "name": "stdout",
-      "execution_count": null,
+     "output_type": "stream",
-      "metadata": {
+     "text": [
-        "id": "iHq4d0dpfawS"
+      "Openai Response\n",
-      },
+      "\n",
-      "outputs": [],
+      "{\n",
-      "source": [
+      "  \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n",
-        "!pip install litellm"
+      "  \"object\": \"chat.completion\",\n",
-      ]
+      "  \"created\": 1694708958,\n",
-    },
+      "  \"model\": \"gpt-3.5-turbo-0613\",\n",
-    {
+      "  \"choices\": [\n",
-      "cell_type": "code",
+      "    {\n",
-      "source": [
+      "      \"index\": 0,\n",
-        "import os, litellm"
+      "      \"message\": {\n",
-      ],
+      "        \"role\": \"assistant\",\n",
-      "metadata": {
+      "        \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n",
-        "id": "mnveHO5dfcB0"
+      "      },\n",
-      },
+      "      \"finish_reason\": \"stop\"\n",
-      "execution_count": 2,
+      "    }\n",
-      "outputs": []
+      "  ],\n",
-    },
+      "  \"usage\": {\n",
-    {
+      "    \"prompt_tokens\": 13,\n",
-      "cell_type": "markdown",
+      "    \"completion_tokens\": 26,\n",
-      "source": [
+      "    \"total_tokens\": 39\n",
-        "## Completion - Quick start"
+      "  }\n",
-      ],
+      "}\n",
-      "metadata": {
+      "Azure Response\n",
-        "id": "eo88QUdbiDIE"
+      "\n",
-      }
+      "{\n",
-    },
+      "  \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n",
-    {
+      "  \"object\": \"chat.completion\",\n",
-      "cell_type": "code",
+      "  \"created\": 1694708960,\n",
-      "source": [
+      "  \"model\": \"gpt-35-turbo\",\n",
-        "import os\n",
+      "  \"choices\": [\n",
-        "from litellm import completion\n",
+      "    {\n",
-        "\n",
+      "      \"index\": 0,\n",
-        "# openai configs\n",
+      "      \"finish_reason\": \"stop\",\n",
-        "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
+      "      \"message\": {\n",
-        "\n",
+      "        \"role\": \"assistant\",\n",
-        "# azure openai configs\n",
+      "        \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n",
-        "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
+      "      }\n",
-        "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
+      "    }\n",
-        "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
+      "  ],\n",
-        "\n",
+      "  \"usage\": {\n",
-        "\n",
+      "    \"completion_tokens\": 27,\n",
-        "# openai call\n",
+      "    \"prompt_tokens\": 14,\n",
-        "response = completion(\n",
+      "    \"total_tokens\": 41\n",
-        "    model = \"gpt-3.5-turbo\",\n",
+      "  }\n",
-        "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
+      "}\n"
-        ")\n",
+     ]
        "print(\"Openai Response\\n\")\n",
        "print(response)\n",
        "\n",
        "\n",
        "\n",
        "# azure call\n",
        "response = completion(\n",
        "    model = \"azure/your-azure-deployment\",\n",
        "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
        ")\n",
        "print(\"Azure Response\\n\")\n",
        "print(response)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "5OSosWNCfc_2",
        "outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
      },
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Openai Response\n",
            "\n",
            "{\n",
            "  \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n",
            "  \"object\": \"chat.completion\",\n",
            "  \"created\": 1694708958,\n",
            "  \"model\": \"gpt-3.5-turbo-0613\",\n",
            "  \"choices\": [\n",
            "    {\n",
            "      \"index\": 0,\n",
            "      \"message\": {\n",
            "        \"role\": \"assistant\",\n",
            "        \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n",
            "      },\n",
            "      \"finish_reason\": \"stop\"\n",
            "    }\n",
            "  ],\n",
            "  \"usage\": {\n",
            "    \"prompt_tokens\": 13,\n",
            "    \"completion_tokens\": 26,\n",
            "    \"total_tokens\": 39\n",
            "  }\n",
            "}\n",
            "Azure Response\n",
            "\n",
            "{\n",
            "  \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n",
            "  \"object\": \"chat.completion\",\n",
            "  \"created\": 1694708960,\n",
            "  \"model\": \"gpt-35-turbo\",\n",
            "  \"choices\": [\n",
            "    {\n",
            "      \"index\": 0,\n",
            "      \"finish_reason\": \"stop\",\n",
            "      \"message\": {\n",
            "        \"role\": \"assistant\",\n",
            "        \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n",
            "      }\n",
            "    }\n",
            "  ],\n",
            "  \"usage\": {\n",
            "    \"completion_tokens\": 27,\n",
            "    \"prompt_tokens\": 14,\n",
            "    \"total_tokens\": 41\n",
            "  }\n",
            "}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Completion - Streaming"
      ],
      "metadata": {
        "id": "dQMkM-diiKdE"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "from litellm import completion\n",
        "\n",
        "# openai configs\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
        "\n",
        "# azure openai configs\n",
        "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
        "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
        "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
        "\n",
        "\n",
        "# openai call\n",
        "response = completion(\n",
        "    model = \"gpt-3.5-turbo\",\n",
        "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
        "    stream=True\n",
        ")\n",
        "print(\"OpenAI Streaming response\")\n",
        "for chunk in response:\n",
        "  print(chunk)\n",
        "\n",
        "# azure call\n",
        "response = completion(\n",
        "    model = \"azure/your-azure-deployment\",\n",
        "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
        "    stream=True\n",
        ")\n",
        "print(\"Azure Streaming response\")\n",
        "for chunk in response:\n",
        "  print(chunk)\n"
      ],
      "metadata": {
        "id": "uVvJDVn4g1i1"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Completion - Azure, OpenAI in separate threads"
      ],
      "metadata": {
        "id": "4xrOPnt-oqwm"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import threading\n",
        "from litellm import completion\n",
        "\n",
        "# Function to make a completion call\n",
        "def make_completion(model, messages):\n",
        "    response = completion(\n",
        "        model=model,\n",
        "        messages=messages\n",
        "    )\n",
        "\n",
        "    print(f\"Response for {model}: {response}\")\n",
        "\n",
        "# openai configs\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
        "\n",
        "# azure openai configs\n",
        "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
        "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
        "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
        "\n",
        "# Define the messages for the completions\n",
        "messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
        "\n",
        "# Create threads for making the completions\n",
        "thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n",
        "thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n",
        "\n",
        "# Start both threads\n",
        "thread1.start()\n",
        "thread2.start()\n",
        "\n",
        "# Wait for both threads to finish\n",
        "thread1.join()\n",
        "thread2.join()\n",
        "\n",
        "print(\"Both completions are done.\")"
      ],
      "metadata": {
        "id": "V5b5taJPjvC3"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Completion - Stress Test 10 requests in parallel\n",
        "\n"
      ],
      "metadata": {
        "id": "lx8DbMBqoAoN"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import threading\n",
        "from litellm import completion\n",
        "\n",
        "# Function to make a completion call\n",
        "def make_completion(model, messages):\n",
        "    response = completion(\n",
        "        model=model,\n",
        "        messages=messages\n",
        "    )\n",
        "\n",
        "    print(f\"Response for {model}: {response}\")\n",
        "\n",
        "# Set your API keys\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
        "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
        "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
        "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
        "\n",
        "# Define the messages for the completions\n",
        "messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
        "\n",
        "# Create and start 10 threads for making completions\n",
        "threads = []\n",
        "for i in range(10):\n",
        "    thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n",
        "    threads.append(thread)\n",
        "    thread.start()\n",
        "\n",
        "# Wait for all threads to finish\n",
        "for thread in threads:\n",
        "    thread.join()\n",
        "\n",
        "print(\"All completions are done.\")\n"
      ],
      "metadata": {
        "id": "pHYANOlOkoDh"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Completion - Azure, OpenAI in the same thread"
      ],
      "metadata": {
        "id": "yB2NDOO4oxrp"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "from litellm import completion\n",
        "\n",
        "# Function to make both OpenAI and Azure completions\n",
        "def make_completions():\n",
        "    # Set your OpenAI API key\n",
        "    os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
        "\n",
        "    # OpenAI completion\n",
        "    openai_response = completion(\n",
        "        model=\"gpt-3.5-turbo\",\n",
        "        messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
        "    )\n",
        "\n",
        "    print(\"OpenAI Response:\", openai_response)\n",
        "\n",
        "    # Set your Azure OpenAI API key and configuration\n",
        "    os.environ[\"AZURE_API_KEY\"] = \"\"\n",
        "    os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
        "    os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
        "\n",
        "    # Azure OpenAI completion\n",
        "    azure_response = completion(\n",
        "        model=\"azure/your-azure-deployment\",\n",
        "        messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
        "    )\n",
        "\n",
        "    print(\"Azure OpenAI Response:\", azure_response)\n",
        "\n",
        "# Call the function to make both completions in one thread\n",
        "make_completions()\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "HTBqwzxpnxab",
        "outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
      },
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "OpenAI Response: {\n",
            "  \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
            "  \"object\": \"chat.completion\",\n",
            "  \"created\": 1694710847,\n",
            "  \"model\": \"gpt-3.5-turbo-0613\",\n",
            "  \"choices\": [\n",
            "    {\n",
            "      \"index\": 0,\n",
            "      \"message\": {\n",
            "        \"role\": \"assistant\",\n",
            "        \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
            "      },\n",
            "      \"finish_reason\": \"stop\"\n",
            "    }\n",
            "  ],\n",
            "  \"usage\": {\n",
            "    \"prompt_tokens\": 13,\n",
            "    \"completion_tokens\": 29,\n",
            "    \"total_tokens\": 42\n",
            "  }\n",
            "}\n",
            "Azure OpenAI Response: {\n",
            "  \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n",
            "  \"object\": \"chat.completion\",\n",
            "  \"created\": 1694710849,\n",
            "  \"model\": \"gpt-35-turbo\",\n",
            "  \"choices\": [\n",
            "    {\n",
            "      \"index\": 0,\n",
            "      \"finish_reason\": \"stop\",\n",
            "      \"message\": {\n",
            "        \"role\": \"assistant\",\n",
            "        \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n",
            "      }\n",
            "    }\n",
            "  ],\n",
            "  \"usage\": {\n",
            "    \"completion_tokens\": 29,\n",
            "    \"prompt_tokens\": 14,\n",
            "    \"total_tokens\": 43\n",
            "  }\n",
            "}\n"
          ]
        }
      ]
    }
-  ]
+   ],
   "source": [
    "from litellm import completion\n",
    "\n",
    "# openai configs\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
    "\n",
    "# azure openai configs\n",
    "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
    "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
    "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
    "\n",
    "\n",
    "# openai call\n",
    "response = completion(\n",
    "    model = \"gpt-3.5-turbo\",\n",
    "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
    ")\n",
    "print(\"Openai Response\\n\")\n",
    "print(response)\n",
    "\n",
    "\n",
    "\n",
    "# azure call\n",
    "response = completion(\n",
    "    model = \"azure/your-azure-deployment\",\n",
    "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
    ")\n",
    "print(\"Azure Response\\n\")\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "dQMkM-diiKdE"
   },
   "source": [
    "## Completion - Streaming"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "uVvJDVn4g1i1"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "from litellm import completion\n",
    "\n",
    "# openai configs\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
    "\n",
    "# azure openai configs\n",
    "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
    "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
    "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
    "\n",
    "\n",
    "# openai call\n",
    "response = completion(\n",
    "    model = \"gpt-3.5-turbo\",\n",
    "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
    "    stream=True\n",
    ")\n",
    "print(\"OpenAI Streaming response\")\n",
    "for chunk in response:\n",
    "  print(chunk)\n",
    "\n",
    "# azure call\n",
    "response = completion(\n",
    "    model = \"azure/your-azure-deployment\",\n",
    "    messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
    "    stream=True\n",
    ")\n",
    "print(\"Azure Streaming response\")\n",
    "for chunk in response:\n",
    "  print(chunk)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "4xrOPnt-oqwm"
   },
   "source": [
    "## Completion - Azure, OpenAI in separate threads"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "V5b5taJPjvC3"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import threading\n",
    "from litellm import completion\n",
    "\n",
    "# Function to make a completion call\n",
    "def make_completion(model, messages):\n",
    "    response = completion(\n",
    "        model=model,\n",
    "        messages=messages\n",
    "    )\n",
    "\n",
    "    print(f\"Response for {model}: {response}\")\n",
    "\n",
    "# openai configs\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
    "\n",
    "# azure openai configs\n",
    "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
    "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
    "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
    "\n",
    "# Define the messages for the completions\n",
    "messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
    "\n",
    "# Create threads for making the completions\n",
    "thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n",
    "thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n",
    "\n",
    "# Start both threads\n",
    "thread1.start()\n",
    "thread2.start()\n",
    "\n",
    "# Wait for both threads to finish\n",
    "thread1.join()\n",
    "thread2.join()\n",
    "\n",
    "print(\"Both completions are done.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "lx8DbMBqoAoN"
   },
   "source": [
    "## Completion - Stress Test 10 requests in parallel\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "pHYANOlOkoDh"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import threading\n",
    "from litellm import completion\n",
    "\n",
    "# Function to make a completion call\n",
    "def make_completion(model, messages):\n",
    "    response = completion(\n",
    "        model=model,\n",
    "        messages=messages\n",
    "    )\n",
    "\n",
    "    print(f\"Response for {model}: {response}\")\n",
    "\n",
    "# Set your API keys\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
    "os.environ[\"AZURE_API_KEY\"] = \"\"\n",
    "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
    "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
    "\n",
    "# Define the messages for the completions\n",
    "messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
    "\n",
    "# Create and start 10 threads for making completions\n",
    "threads = []\n",
    "for i in range(10):\n",
    "    thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n",
    "    threads.append(thread)\n",
    "    thread.start()\n",
    "\n",
    "# Wait for all threads to finish\n",
    "for thread in threads:\n",
    "    thread.join()\n",
    "\n",
    "print(\"All completions are done.\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "yB2NDOO4oxrp"
   },
   "source": [
    "## Completion - Azure, OpenAI in the same thread"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "HTBqwzxpnxab",
    "outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OpenAI Response: {\n",
      "  \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
      "  \"object\": \"chat.completion\",\n",
      "  \"created\": 1694710847,\n",
      "  \"model\": \"gpt-3.5-turbo-0613\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"index\": 0,\n",
      "      \"message\": {\n",
      "        \"role\": \"assistant\",\n",
      "        \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
      "      },\n",
      "      \"finish_reason\": \"stop\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 13,\n",
      "    \"completion_tokens\": 29,\n",
      "    \"total_tokens\": 42\n",
      "  }\n",
      "}\n",
      "Azure OpenAI Response: {\n",
      "  \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n",
      "  \"object\": \"chat.completion\",\n",
      "  \"created\": 1694710849,\n",
      "  \"model\": \"gpt-35-turbo\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"index\": 0,\n",
      "      \"finish_reason\": \"stop\",\n",
      "      \"message\": {\n",
      "        \"role\": \"assistant\",\n",
      "        \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n",
      "      }\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"completion_tokens\": 29,\n",
      "    \"prompt_tokens\": 14,\n",
      "    \"total_tokens\": 43\n",
      "  }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "from litellm import completion\n",
    "\n",
    "# Function to make both OpenAI and Azure completions\n",
    "def make_completions():\n",
    "    # Set your OpenAI API key\n",
    "    os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
    "\n",
    "    # OpenAI completion\n",
    "    openai_response = completion(\n",
    "        model=\"gpt-3.5-turbo\",\n",
    "        messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
    "    )\n",
    "\n",
    "    print(\"OpenAI Response:\", openai_response)\n",
    "\n",
    "    # Set your Azure OpenAI API key and configuration\n",
    "    os.environ[\"AZURE_API_KEY\"] = \"\"\n",
    "    os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
    "    os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
    "\n",
    "    # Azure OpenAI completion\n",
    "    azure_response = completion(\n",
    "        model=\"azure/your-azure-deployment\",\n",
    "        messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
    "    )\n",
    "\n",
    "    print(\"Azure OpenAI Response:\", azure_response)\n",
    "\n",
    "# Call the function to make both completions in one thread\n",
    "make_completions()\n"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/cookbook/LiteLLM_Comparing_LLMs.ipynb
+++ b/cookbook/LiteLLM_Comparing_LLMs.ipynb
--- a/cookbook/LiteLLM_batch_completion.ipynb
+++ b/cookbook/LiteLLM_batch_completion.ipynb
@ -1,166 +1,163 @@
 {
-  "nbformat": 4,
+ "cells": [
-  "nbformat_minor": 0,
+  {
-  "metadata": {
+   "cell_type": "markdown",
-    "colab": {
+   "metadata": {
-      "provenance": []
+    "id": "MbLbs1tbISk-"
-    },
+   },
-    "kernelspec": {
+   "source": [
-      "name": "python3",
+    "# LiteLLM Batch Completions Example\n",
-      "display_name": "Python 3"
+    "\n",
-    },
+    "* This tutorial walks through using `batch_completion`\n",
-    "language_info": {
+    "* Docs: https://docs.litellm.ai/docs/completion/batching"
-      "name": "python"
+   ]
    }
  },
-  "cells": [
+  {
-    {
+   "cell_type": "code",
-      "cell_type": "markdown",
+   "execution_count": null,
-      "source": [
+   "metadata": {
-        "# LiteLLM Batch Completions Example\n",
+    "id": "Ty6-ko_aDlPF"
-        "\n",
+   },
-        "* This tutorial walks through using `batch_completion`\n",
+   "outputs": [],
-        "* Docs: https://docs.litellm.ai/docs/completion/batching"
+   "source": [
-      ],
+    "!pip install litellm"
-      "metadata": {
+   ]
-        "id": "MbLbs1tbISk-"
+  },
-      }
+  {
   "cell_type": "markdown",
   "metadata": {
    "id": "KGhNJRUCIh1j"
   },
   "source": [
    "## Import Batch Completion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "id": "LOtI43snDrSK"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "from litellm import batch_completion\n",
    "\n",
    "# set your API_KEY\n",
    "os.environ['ANTHROPIC_API_KEY'] = \"\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Xhv92NBaIpaw"
   },
   "source": [
    "## Calling `litellm.batch_completion`\n",
    "\n",
    "In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "yY7GIRLsDywu",
    "outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
   },
   "outputs": [
    {
-      "cell_type": "code",
+     "data": {
-      "execution_count": null,
+      "text/plain": [
-      "metadata": {
+       "[<ModelResponse at 0x7a164eed4450> JSON: {\n",
-        "id": "Ty6-ko_aDlPF"
+       "   \"choices\": [\n",
-      },
+       "     {\n",
-      "outputs": [],
+       "       \"finish_reason\": \"stop\",\n",
-      "source": [
+       "       \"index\": 0,\n",
-        "!pip install litellm"
+       "       \"message\": {\n",
-      ]
+       "         \"content\": \" Good morning!\",\n",
-    },
+       "         \"role\": \"assistant\",\n",
-    {
+       "         \"logprobs\": null\n",
-      "cell_type": "markdown",
+       "       }\n",
-      "source": [
+       "     }\n",
-        "## Import Batch Completion"
+       "   ],\n",
-      ],
+       "   \"created\": 1694030351.309254,\n",
-      "metadata": {
+       "   \"model\": \"claude-2\",\n",
-        "id": "KGhNJRUCIh1j"
+       "   \"usage\": {\n",
-      }
+       "     \"prompt_tokens\": 11,\n",
-    },
+       "     \"completion_tokens\": 3,\n",
-    {
+       "     \"total_tokens\": 14\n",
-      "cell_type": "code",
+       "   }\n",
-      "source": [
+       " },\n",
-        "import litellm\n",
+       " <ModelResponse at 0x7a164eed5800> JSON: {\n",
-        "import os\n",
+       "   \"choices\": [\n",
-        "from litellm import batch_completion\n",
+       "     {\n",
-        "\n",
+       "       \"finish_reason\": \"stop\",\n",
-        "# set your API_KEY\n",
+       "       \"index\": 0,\n",
-        "os.environ['ANTHROPIC_API_KEY'] = \"\""
+       "       \"message\": {\n",
-      ],
+       "         \"content\": \" I'm an AI assistant created by Anthropic. I don't actually have a concept of the current time.\",\n",
-      "metadata": {
+       "         \"role\": \"assistant\",\n",
-        "id": "LOtI43snDrSK"
+       "         \"logprobs\": null\n",
-      },
+       "       }\n",
-      "execution_count": 7,
+       "     }\n",
-      "outputs": []
+       "   ],\n",
-    },
+       "   \"created\": 1694030352.1215081,\n",
-    {
+       "   \"model\": \"claude-2\",\n",
-      "cell_type": "markdown",
+       "   \"usage\": {\n",
-      "source": [
+       "     \"prompt_tokens\": 13,\n",
-        "## Calling `litellm.batch_completion`\n",
+       "     \"completion_tokens\": 22,\n",
-        "\n",
+       "     \"total_tokens\": 35\n",
-        "In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
+       "   }\n",
-      ],
+       " }]"
      "metadata": {
        "id": "Xhv92NBaIpaw"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import litellm\n",
        "import os\n",
        "from litellm import batch_completion\n",
        "\n",
        "os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
        "\n",
        "\n",
        "responses = batch_completion(\n",
        "    model=\"claude-2\",\n",
        "    messages = [\n",
        "        [\n",
        "            {\n",
        "                \"role\": \"user\",\n",
        "                \"content\": \"good morning? \"\n",
        "            }\n",
        "        ],\n",
        "        [\n",
        "            {\n",
        "                \"role\": \"user\",\n",
        "                \"content\": \"what's the time? \"\n",
        "            }\n",
        "        ]\n",
        "    ]\n",
        ")\n",
        "responses"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "yY7GIRLsDywu",
        "outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[<ModelResponse at 0x7a164eed4450> JSON: {\n",
              "   \"choices\": [\n",
              "     {\n",
              "       \"finish_reason\": \"stop\",\n",
              "       \"index\": 0,\n",
              "       \"message\": {\n",
              "         \"content\": \" Good morning!\",\n",
              "         \"role\": \"assistant\",\n",
              "         \"logprobs\": null\n",
              "       }\n",
              "     }\n",
              "   ],\n",
              "   \"created\": 1694030351.309254,\n",
              "   \"model\": \"claude-2\",\n",
              "   \"usage\": {\n",
              "     \"prompt_tokens\": 11,\n",
              "     \"completion_tokens\": 3,\n",
              "     \"total_tokens\": 14\n",
              "   }\n",
              " },\n",
              " <ModelResponse at 0x7a164eed5800> JSON: {\n",
              "   \"choices\": [\n",
              "     {\n",
              "       \"finish_reason\": \"stop\",\n",
              "       \"index\": 0,\n",
              "       \"message\": {\n",
              "         \"content\": \" I'm an AI assistant created by Anthropic. I don't actually have a concept of the current time.\",\n",
              "         \"role\": \"assistant\",\n",
              "         \"logprobs\": null\n",
              "       }\n",
              "     }\n",
              "   ],\n",
              "   \"created\": 1694030352.1215081,\n",
              "   \"model\": \"claude-2\",\n",
              "   \"usage\": {\n",
              "     \"prompt_tokens\": 13,\n",
              "     \"completion_tokens\": 22,\n",
              "     \"total_tokens\": 35\n",
              "   }\n",
              " }]"
            ]
          },
          "metadata": {},
          "execution_count": 11
        }
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
-  ]
+   ],
   "source": [
    "import os\n",
    "\n",
    "os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
    "\n",
    "\n",
    "responses = batch_completion(\n",
    "    model=\"claude-2\",\n",
    "    messages = [\n",
    "        [\n",
    "            {\n",
    "                \"role\": \"user\",\n",
    "                \"content\": \"good morning? \"\n",
    "            }\n",
    "        ],\n",
    "        [\n",
    "            {\n",
    "                \"role\": \"user\",\n",
    "                \"content\": \"what's the time? \"\n",
    "            }\n",
    "        ]\n",
    "    ]\n",
    ")\n",
    "responses"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/cookbook/Proxy_Batch_Users.ipynb
+++ b/cookbook/Proxy_Batch_Users.ipynb
@ -1,204 +1,205 @@
 {
-  "cells": [
+ "cells": [
-    {
+  {
-      "cell_type": "markdown",
+   "cell_type": "markdown",
-      "metadata": {
+   "metadata": {
-        "id": "680oRk1af-xJ"
+    "id": "680oRk1af-xJ"
-      },
+   },
-      "source": [
+   "source": [
-        "# Environment Setup"
+    "# Environment Setup"
-      ]
+   ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "X7TgJFn8f88p"
      },
      "outputs": [],
      "source": [
        "import csv\n",
        "from typing import Optional\n",
        "import httpx, json\n",
        "import asyncio\n",
        "\n",
        "proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
        "master_key = \"sk-1234\" # 👈 SET TO PROXY MASTER KEY"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "rauw8EOhgBz5"
      },
      "outputs": [],
      "source": [
        "## GLOBAL HTTP CLIENT ## - faster http calls\n",
        "class HTTPHandler:\n",
        "    def __init__(self, concurrent_limit=1000):\n",
        "        # Create a client with a connection pool\n",
        "        self.client = httpx.AsyncClient(\n",
        "            limits=httpx.Limits(\n",
        "                max_connections=concurrent_limit,\n",
        "                max_keepalive_connections=concurrent_limit,\n",
        "            )\n",
        "        )\n",
        "\n",
        "    async def close(self):\n",
        "        # Close the client when you're done with it\n",
        "        await self.client.aclose()\n",
        "\n",
        "    async def get(\n",
        "        self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None\n",
        "    ):\n",
        "        response = await self.client.get(url, params=params, headers=headers)\n",
        "        return response\n",
        "\n",
        "    async def post(\n",
        "        self,\n",
        "        url: str,\n",
        "        data: Optional[dict] = None,\n",
        "        params: Optional[dict] = None,\n",
        "        headers: Optional[dict] = None,\n",
        "    ):\n",
        "        try:\n",
        "            response = await self.client.post(\n",
        "                url, data=data, params=params, headers=headers\n",
        "            )\n",
        "            return response\n",
        "        except Exception as e:\n",
        "            raise e\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "7LXN8zaLgOie"
      },
      "source": [
        "# Import Sheet\n",
        "\n",
        "\n",
        "Format: | ID | Name | Max Budget |"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "oiED0usegPGf"
      },
      "outputs": [],
      "source": [
        "async def import_sheet():\n",
        "    tasks = []\n",
        "    http_client = HTTPHandler()\n",
        "    with open('my-batch-sheet.csv', 'r') as file:\n",
        "        csv_reader = csv.DictReader(file)\n",
        "        for row in csv_reader:\n",
        "            task = create_user(client=http_client, user_id=row['ID'], max_budget=row['Max Budget'], user_name=row['Name'])\n",
        "            tasks.append(task)\n",
        "            # print(f\"ID: {row['ID']}, Name: {row['Name']}, Max Budget: {row['Max Budget']}\")\n",
        "\n",
        "    keys = await asyncio.gather(*tasks)\n",
        "\n",
        "    with open('my-batch-sheet_new.csv', 'w', newline='') as new_file:\n",
        "        fieldnames = ['ID', 'Name', 'Max Budget', 'keys']\n",
        "        csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames)\n",
        "        csv_writer.writeheader()\n",
        "\n",
        "        with open('my-batch-sheet.csv', 'r') as file:\n",
        "            csv_reader = csv.DictReader(file)\n",
        "            for i, row in enumerate(csv_reader):\n",
        "                row['keys'] = keys[i]  # Add the 'keys' value from the corresponding task result\n",
        "                csv_writer.writerow(row)\n",
        "\n",
        "    await http_client.close()\n",
        "\n",
        "asyncio.run(import_sheet())"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "E7M0Li_UgJeZ"
      },
      "source": [
        "# Create Users + Keys\n",
        "\n",
        "- Creates a user\n",
        "- Creates a key with max budget"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "NZudRFujf7j-"
      },
      "outputs": [],
      "source": [
        "\n",
        "async def create_key_with_alias(client: HTTPHandler, user_id: str, max_budget: float):\n",
        "    global proxy_base_url\n",
        "    if not proxy_base_url.endswith(\"/\"):\n",
        "        proxy_base_url += \"/\"\n",
        "    url = proxy_base_url + \"key/generate\"\n",
        "\n",
        "    # call /key/generate\n",
        "    print(\"CALLING /KEY/GENERATE\")\n",
        "    response = await client.post(\n",
        "        url=url,\n",
        "        headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
        "        data=json.dumps({\n",
        "            \"user_id\": user_id,\n",
        "            \"key_alias\": f\"{user_id}-key\",\n",
        "            \"max_budget\": max_budget # 👈 KEY CHANGE: SETS MAX BUDGET PER KEY\n",
        "        })\n",
        "    )\n",
        "    print(f\"response: {response.text}\")\n",
        "    return response.json()[\"key\"]\n",
        "\n",
        "async def create_user(client: HTTPHandler, user_id: str, max_budget: float, user_name: str):\n",
        "    \"\"\"\n",
        "    - call /user/new\n",
        "    - create key for user\n",
        "    \"\"\"\n",
        "    global proxy_base_url\n",
        "    if not proxy_base_url.endswith(\"/\"):\n",
        "        proxy_base_url += \"/\"\n",
        "    url = proxy_base_url + \"user/new\"\n",
        "\n",
        "    # call /user/new\n",
        "    await client.post(\n",
        "        url=url,\n",
        "        headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
        "        data=json.dumps({\n",
        "            \"user_id\": user_id,\n",
        "            \"user_alias\": user_name,\n",
        "            \"auto_create_key\": False,\n",
        "            # \"max_budget\": max_budget # 👈 [OPTIONAL] Sets max budget per user (if you want to set a max budget across keys)\n",
        "        })\n",
        "    )\n",
        "\n",
        "    # create key for user\n",
        "    return await create_key_with_alias(client=client, user_id=user_id, max_budget=max_budget)\n"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
-  "nbformat": 4,
+  {
-  "nbformat_minor": 0
+   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "X7TgJFn8f88p"
   },
   "outputs": [],
   "source": [
    "import csv\n",
    "from typing import Optional\n",
    "import httpx\n",
    "import json\n",
    "import asyncio\n",
    "\n",
    "proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
    "master_key = \"sk-1234\" # 👈 SET TO PROXY MASTER KEY"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "rauw8EOhgBz5"
   },
   "outputs": [],
   "source": [
    "## GLOBAL HTTP CLIENT ## - faster http calls\n",
    "class HTTPHandler:\n",
    "    def __init__(self, concurrent_limit=1000):\n",
    "        # Create a client with a connection pool\n",
    "        self.client = httpx.AsyncClient(\n",
    "            limits=httpx.Limits(\n",
    "                max_connections=concurrent_limit,\n",
    "                max_keepalive_connections=concurrent_limit,\n",
    "            )\n",
    "        )\n",
    "\n",
    "    async def close(self):\n",
    "        # Close the client when you're done with it\n",
    "        await self.client.aclose()\n",
    "\n",
    "    async def get(\n",
    "        self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None\n",
    "    ):\n",
    "        response = await self.client.get(url, params=params, headers=headers)\n",
    "        return response\n",
    "\n",
    "    async def post(\n",
    "        self,\n",
    "        url: str,\n",
    "        data: Optional[dict] = None,\n",
    "        params: Optional[dict] = None,\n",
    "        headers: Optional[dict] = None,\n",
    "    ):\n",
    "        try:\n",
    "            response = await self.client.post(\n",
    "                url, data=data, params=params, headers=headers\n",
    "            )\n",
    "            return response\n",
    "        except Exception as e:\n",
    "            raise e\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "7LXN8zaLgOie"
   },
   "source": [
    "# Import Sheet\n",
    "\n",
    "\n",
    "Format: | ID | Name | Max Budget |"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "oiED0usegPGf"
   },
   "outputs": [],
   "source": [
    "async def import_sheet():\n",
    "    tasks = []\n",
    "    http_client = HTTPHandler()\n",
    "    with open('my-batch-sheet.csv', 'r') as file:\n",
    "        csv_reader = csv.DictReader(file)\n",
    "        for row in csv_reader:\n",
    "            task = create_user(client=http_client, user_id=row['ID'], max_budget=row['Max Budget'], user_name=row['Name'])\n",
    "            tasks.append(task)\n",
    "            # print(f\"ID: {row['ID']}, Name: {row['Name']}, Max Budget: {row['Max Budget']}\")\n",
    "\n",
    "    keys = await asyncio.gather(*tasks)\n",
    "\n",
    "    with open('my-batch-sheet_new.csv', 'w', newline='') as new_file:\n",
    "        fieldnames = ['ID', 'Name', 'Max Budget', 'keys']\n",
    "        csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames)\n",
    "        csv_writer.writeheader()\n",
    "\n",
    "        with open('my-batch-sheet.csv', 'r') as file:\n",
    "            csv_reader = csv.DictReader(file)\n",
    "            for i, row in enumerate(csv_reader):\n",
    "                row['keys'] = keys[i]  # Add the 'keys' value from the corresponding task result\n",
    "                csv_writer.writerow(row)\n",
    "\n",
    "    await http_client.close()\n",
    "\n",
    "asyncio.run(import_sheet())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "E7M0Li_UgJeZ"
   },
   "source": [
    "# Create Users + Keys\n",
    "\n",
    "- Creates a user\n",
    "- Creates a key with max budget"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "NZudRFujf7j-"
   },
   "outputs": [],
   "source": [
    "\n",
    "async def create_key_with_alias(client: HTTPHandler, user_id: str, max_budget: float):\n",
    "    global proxy_base_url\n",
    "    if not proxy_base_url.endswith(\"/\"):\n",
    "        proxy_base_url += \"/\"\n",
    "    url = proxy_base_url + \"key/generate\"\n",
    "\n",
    "    # call /key/generate\n",
    "    print(\"CALLING /KEY/GENERATE\")\n",
    "    response = await client.post(\n",
    "        url=url,\n",
    "        headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
    "        data=json.dumps({\n",
    "            \"user_id\": user_id,\n",
    "            \"key_alias\": f\"{user_id}-key\",\n",
    "            \"max_budget\": max_budget # 👈 KEY CHANGE: SETS MAX BUDGET PER KEY\n",
    "        })\n",
    "    )\n",
    "    print(f\"response: {response.text}\")\n",
    "    return response.json()[\"key\"]\n",
    "\n",
    "async def create_user(client: HTTPHandler, user_id: str, max_budget: float, user_name: str):\n",
    "    \"\"\"\n",
    "    - call /user/new\n",
    "    - create key for user\n",
    "    \"\"\"\n",
    "    global proxy_base_url\n",
    "    if not proxy_base_url.endswith(\"/\"):\n",
    "        proxy_base_url += \"/\"\n",
    "    url = proxy_base_url + \"user/new\"\n",
    "\n",
    "    # call /user/new\n",
    "    await client.post(\n",
    "        url=url,\n",
    "        headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
    "        data=json.dumps({\n",
    "            \"user_id\": user_id,\n",
    "            \"user_alias\": user_name,\n",
    "            \"auto_create_key\": False,\n",
    "            # \"max_budget\": max_budget # 👈 [OPTIONAL] Sets max budget per user (if you want to set a max budget across keys)\n",
    "        })\n",
    "    )\n",
    "\n",
    "    # create key for user\n",
    "    return await create_key_with_alias(client=client, user_id=user_id, max_budget=max_budget)\n"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/cookbook/TogetherAI_liteLLM.ipynb
+++ b/cookbook/TogetherAI_liteLLM.ipynb
--- a/cookbook/Using_Nemo_Guardrails_with_LiteLLM_Server.ipynb
+++ b/cookbook/Using_Nemo_Guardrails_with_LiteLLM_Server.ipynb
@ -1,159 +1,157 @@
 {
-  "nbformat": 4,
+ "cells": [
-  "nbformat_minor": 0,
+  {
-  "metadata": {
+   "cell_type": "markdown",
-    "colab": {
+   "metadata": {
-      "provenance": []
+    "id": "eKXncoQbU_2j"
-    },
+   },
-    "kernelspec": {
+   "source": [
-      "name": "python3",
+    "# Using Nemo-Guardrails with LiteLLM Server\n",
-      "display_name": "Python 3"
+    "\n",
-    },
+    "[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
-    "language_info": {
+   ]
      "name": "python"
    }
  },
-  "cells": [
+  {
-    {
+   "cell_type": "markdown",
-      "cell_type": "markdown",
+   "metadata": {
-      "source": [
+    "id": "ZciYaLwvuFbu"
-        "# Using Nemo-Guardrails with LiteLLM Server\n",
+   },
-        "\n",
+   "source": [
-        "[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
+    "## Using with Bedrock\n",
-      ],
+    "\n",
-      "metadata": {
+    "`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
-        "id": "eKXncoQbU_2j"
+   ]
-      }
+  },
-    },
+  {
-    {
+   "cell_type": "code",
-      "cell_type": "markdown",
+   "execution_count": null,
-      "source": [
+   "metadata": {
-        "## Using with Bedrock\n",
+    "id": "vOUwGSJ2Vsy3"
-        "\n",
+   },
-        "`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
+   "outputs": [],
-      ],
+   "source": [
-      "metadata": {
+    "pip install nemoguardrails langchain"
-        "id": "ZciYaLwvuFbu"
+   ]
-      }
+  },
-    },
+  {
-    {
+   "cell_type": "code",
-      "cell_type": "code",
+   "execution_count": null,
-      "source": [
+   "metadata": {
-        "pip install nemoguardrails langchain"
+    "id": "xXEJNxe7U0IN"
-      ],
+   },
-      "metadata": {
+   "outputs": [],
-        "id": "vOUwGSJ2Vsy3"
+   "source": [
-      },
+    "from langchain.chat_models import ChatOpenAI\n",
-      "execution_count": null,
+    "\n",
-      "outputs": []
+    "llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
-    },
+    "\n",
-    {
+    "from nemoguardrails import LLMRails, RailsConfig\n",
-      "cell_type": "code",
+    "\n",
-      "execution_count": null,
+    "config = RailsConfig.from_path(\"./config.yml\")\n",
-      "metadata": {
+    "app = LLMRails(config, llm=llm)\n",
-        "id": "xXEJNxe7U0IN"
+    "\n",
-      },
+    "new_message = app.generate(messages=[{\n",
-      "outputs": [],
+    "    \"role\": \"user\",\n",
-      "source": [
+    "    \"content\": \"Hello! What can you do for me?\"\n",
-        "import openai\n",
+    "}])"
-        "from langchain.chat_models import ChatOpenAI\n",
+   ]
-        "\n",
+  },
-        "llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
+  {
-        "\n",
+   "cell_type": "markdown",
-        "from nemoguardrails import LLMRails, RailsConfig\n",
+   "metadata": {
-        "\n",
+    "id": "vz5n00qyuKjp"
-        "config = RailsConfig.from_path(\"./config.yml\")\n",
+   },
-        "app = LLMRails(config, llm=llm)\n",
+   "source": [
-        "\n",
+    "## Using with TogetherAI\n",
-        "new_message = app.generate(messages=[{\n",
+    "\n",
-        "    \"role\": \"user\",\n",
+    "1. You can either set this in the server environment:\n",
-        "    \"content\": \"Hello! What can you do for me?\"\n",
+    "`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
-        "}])"
+    "\n",
-      ]
+    "2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
-    },
+   ]
-    {
+  },
-      "cell_type": "markdown",
+  {
-      "source": [
+   "cell_type": "code",
-        "## Using with TogetherAI\n",
+   "execution_count": null,
-        "\n",
+   "metadata": {
-        "1. You can either set this in the server environment:\n",
+    "id": "XK1sk-McuhpE"
-        "`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
+   },
-        "\n",
+   "outputs": [],
-        "2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
+   "source": [
-      ],
+    "from langchain.chat_models import ChatOpenAI\n",
-      "metadata": {
+    "\n",
-        "id": "vz5n00qyuKjp"
+    "llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
-      }
+    "\n",
-    },
+    "from nemoguardrails import LLMRails, RailsConfig\n",
-    {
+    "\n",
-      "cell_type": "code",
+    "config = RailsConfig.from_path(\"./config.yml\")\n",
-      "source": [
+    "app = LLMRails(config, llm=llm)\n",
-        "import openai\n",
+    "\n",
-        "from langchain.chat_models import ChatOpenAI\n",
+    "new_message = app.generate(messages=[{\n",
-        "\n",
+    "    \"role\": \"user\",\n",
-        "llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
+    "    \"content\": \"Hello! What can you do for me?\"\n",
-        "\n",
+    "}])"
-        "from nemoguardrails import LLMRails, RailsConfig\n",
+   ]
-        "\n",
+  },
-        "config = RailsConfig.from_path(\"./config.yml\")\n",
+  {
-        "app = LLMRails(config, llm=llm)\n",
+   "cell_type": "markdown",
-        "\n",
+   "metadata": {
-        "new_message = app.generate(messages=[{\n",
+    "id": "8A1KWKnzuxAS"
-        "    \"role\": \"user\",\n",
+   },
-        "    \"content\": \"Hello! What can you do for me?\"\n",
+   "source": [
-        "}])"
+    "### CONFIG.YML\n",
-      ],
+    "\n",
-      "metadata": {
+    "save this example `config.yml` in your current directory"
-        "id": "XK1sk-McuhpE"
+   ]
-      },
+  },
-      "execution_count": null,
+  {
-      "outputs": []
+   "cell_type": "code",
-    },
+   "execution_count": null,
-    {
+   "metadata": {
-      "cell_type": "markdown",
+    "id": "NKN1GmSvu0Cx"
-      "source": [
+   },
-        "### CONFIG.YML\n",
+   "outputs": [],
-        "\n",
+   "source": [
-        "save this example `config.yml` in your current directory"
+    "# instructions:\n",
-      ],
+    "#   - type: general\n",
-      "metadata": {
+    "#     content: |\n",
-        "id": "8A1KWKnzuxAS"
+    "#       Below is a conversation between a bot and a user about the recent job reports.\n",
-      }
+    "#       The bot is factual and concise. If the bot does not know the answer to a\n",
-    },
+    "#       question, it truthfully says it does not know.\n",
-    {
+    "\n",
-      "cell_type": "code",
+    "# sample_conversation: |\n",
-      "source": [
+    "#   user \"Hello there!\"\n",
-        "# instructions:\n",
+    "#     express greeting\n",
-        "#   - type: general\n",
+    "#   bot express greeting\n",
-        "#     content: |\n",
+    "#     \"Hello! How can I assist you today?\"\n",
-        "#       Below is a conversation between a bot and a user about the recent job reports.\n",
+    "#   user \"What can you do for me?\"\n",
-        "#       The bot is factual and concise. If the bot does not know the answer to a\n",
+    "#     ask about capabilities\n",
-        "#       question, it truthfully says it does not know.\n",
+    "#   bot respond about capabilities\n",
-        "\n",
+    "#     \"I am an AI assistant that helps answer mathematical questions. My core mathematical skills are powered by wolfram alpha.\"\n",
-        "# sample_conversation: |\n",
+    "#   user \"What's 2+2?\"\n",
-        "#   user \"Hello there!\"\n",
+    "#     ask math question\n",
-        "#     express greeting\n",
+    "#   bot responds to math question\n",
-        "#   bot express greeting\n",
+    "#     \"2+2 is equal to 4.\"\n",
-        "#     \"Hello! How can I assist you today?\"\n",
+    "\n",
-        "#   user \"What can you do for me?\"\n",
+    "# models:\n",
-        "#     ask about capabilities\n",
+    "#   - type: main\n",
-        "#   bot respond about capabilities\n",
+    "#     engine: openai\n",
-        "#     \"I am an AI assistant that helps answer mathematical questions. My core mathematical skills are powered by wolfram alpha.\"\n",
+    "#     model: claude-instant-1"
-        "#   user \"What's 2+2?\"\n",
+   ]
-        "#     ask math question\n",
+  }
-        "#   bot responds to math question\n",
+ ],
-        "#     \"2+2 is equal to 4.\"\n",
+ "metadata": {
-        "\n",
+  "colab": {
-        "# models:\n",
+   "provenance": []
-        "#   - type: main\n",
+  },
-        "#     engine: openai\n",
+  "kernelspec": {
-        "#     model: claude-instant-1"
+   "display_name": "Python 3",
-      ],
+   "name": "python3"
-      "metadata": {
+  },
-        "id": "NKN1GmSvu0Cx"
+  "language_info": {
-      },
+   "name": "python"
-      "execution_count": null,
+  }
-      "outputs": []
+ },
-    }
+ "nbformat": 4,
-  ]
+ "nbformat_minor": 0
 }
--- a/cookbook/benchmark/eval_suites_mlflow_autoevals/auto_evals.py
+++ b/cookbook/benchmark/eval_suites_mlflow_autoevals/auto_evals.py
@ -1,16 +1,12 @@
 import sys, os
 import traceback
 from dotenv import load_dotenv
 load_dotenv()
 import litellm
 from litellm import embedding, completion, completion_cost
 from autoevals.llm import *
 ###################
 import litellm
 # litellm completion call
 question = "which country has the highest population"
--- a/cookbook/codellama-server/main.py
+++ b/cookbook/codellama-server/main.py
@ -1,11 +1,12 @@
 import traceback
-from flask import Flask, request, jsonify, abort, Response
+from flask import Flask, request, Response
 from flask_cors import CORS
 import traceback
 import litellm
 from util import handle_error
 from litellm import completion
-import os, dotenv, time
+import os
 import dotenv
 import time
 import json
 dotenv.load_dotenv()
@ -20,9 +21,9 @@ verbose = True
 # litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/
 ######### PROMPT LOGGING ##########
-os.environ[
+os.environ["PROMPTLAYER_API_KEY"] = (
-    "PROMPTLAYER_API_KEY"
+    ""  # set your promptlayer key here - https://promptlayer.com/
-] = ""  # set your promptlayer key here - https://promptlayer.com/
+)
 # set callbacks
 litellm.success_callback = ["promptlayer"]
@ -57,9 +58,9 @@ def api_completion():
    try:
        if "prompt" not in data:
            raise ValueError("data needs to have prompt")
-        data[
+        data["model"] = (
-            "model"
+            "togethercomputer/CodeLlama-34b-Instruct"  # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
-        ] = "togethercomputer/CodeLlama-34b-Instruct"  # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
+        )
        # COMPLETION CALL
        system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."
        messages = [
@ -75,7 +76,7 @@ def api_completion():
            "stream" in data and data["stream"] == True
        ):  # use generate_responses to stream responses
            return Response(data_generator(response), mimetype="text/event-stream")
-    except Exception as e:
+    except Exception:
        # call handle_error function
        print_verbose(f"Got Error api_completion(): {traceback.format_exc()}")
        ## LOG FAILURE
--- a/cookbook/community-resources/get_hf_models.py
+++ b/cookbook/community-resources/get_hf_models.py
@ -1,5 +1,4 @@
 import requests
 from urllib.parse import urlparse, parse_qs
 def get_next_url(response):
--- a/cookbook/liteLLM_Baseten.ipynb
+++ b/cookbook/liteLLM_Baseten.ipynb
@ -1,238 +1,237 @@
 {
-  "nbformat": 4,
+ "cells": [
-  "nbformat_minor": 0,
+  {
-  "metadata": {
+   "cell_type": "markdown",
-    "colab": {
+   "metadata": {
-      "provenance": []
+    "id": "gZx-wHJapG5w"
-    },
+   },
-    "kernelspec": {
+   "source": [
-      "name": "python3",
+    "# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
-      "display_name": "Python 3"
+    "\n",
-    },
+    "* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
-    "language_info": {
+    "* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
-      "name": "python"
+    "* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
-    }
+    "\n",
    "\n",
    "## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
    "Example call\n",
    "```python\n",
    "model = \"q841o8w\" # baseten model version ID\n",
    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
    "```"
   ]
  },
-  "cells": [
+  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "4JSRa0QVogPo"
   },
   "outputs": [],
   "source": [
    "!pip install litellm==0.1.399\n",
    "!pip install baseten urllib3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "id": "VEukLhDzo4vw"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "from litellm import completion"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "4STYM2OHFNlc"
   },
   "source": [
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "id": "DorpLxw1FHbC"
   },
   "outputs": [],
   "source": [
    "os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
    "messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "syF3dTdKFSQQ"
   },
   "source": [
    "## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
    "### Pass Your Baseten model `Version ID` as `model`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "rPgSoMlsojz0",
    "outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
   },
   "outputs": [
    {
-      "cell_type": "markdown",
+     "name": "stderr",
-      "source": [
+     "output_type": "stream",
-        "# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
+     "text": [
-        "\n",
+      "\u001b[32mINFO\u001b[0m API key set.\n",
-        "* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
+      "INFO:baseten:API key set.\n"
-        "* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
+     ]
        "* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
        "\n",
        "\n",
        "## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
        "Example call\n",
        "```python\n",
        "model = \"q841o8w\" # baseten model version ID\n",
        "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
        "```"
      ],
      "metadata": {
        "id": "gZx-wHJapG5w"
      }
    },
    {
-      "cell_type": "code",
+     "data": {
-      "execution_count": null,
+      "text/plain": [
-      "metadata": {
+       "{'choices': [{'finish_reason': 'stop',\n",
-        "id": "4JSRa0QVogPo"
+       "   'index': 0,\n",
-      },
+       "   'message': {'role': 'assistant',\n",
-      "outputs": [],
+       "    'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
-      "source": [
+       " 'created': 1692135883.699066,\n",
-        "!pip install litellm==0.1.399\n",
+       " 'model': 'qvv0xeq'}"
        "!pip install baseten urllib3"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import litellm\n",
        "from litellm import completion"
      ],
      "metadata": {
        "id": "VEukLhDzo4vw"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Setup"
      ],
      "metadata": {
        "id": "4STYM2OHFNlc"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
        "messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
      ],
      "metadata": {
        "id": "DorpLxw1FHbC"
      },
      "execution_count": 21,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
        "### Pass Your Baseten model `Version ID` as `model`"
      ],
      "metadata": {
        "id": "syF3dTdKFSQQ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model = \"qvv0xeq\"\n",
        "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
        "response"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "rPgSoMlsojz0",
        "outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
      },
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32mINFO\u001b[0m API key set.\n",
            "INFO:baseten:API key set.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'choices': [{'finish_reason': 'stop',\n",
              "   'index': 0,\n",
              "   'message': {'role': 'assistant',\n",
              "    'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
              " 'created': 1692135883.699066,\n",
              " 'model': 'qvv0xeq'}"
            ]
          },
          "metadata": {},
          "execution_count": 18
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
        "### Pass Your Baseten model `Version ID` as `model`"
      ],
      "metadata": {
        "id": "7n21UroEGCGa"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model = \"q841o8w\"\n",
        "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
        "response"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "uLVWFH899lAF",
        "outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
      },
      "execution_count": 19,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32mINFO\u001b[0m API key set.\n",
            "INFO:baseten:API key set.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'choices': [{'finish_reason': 'stop',\n",
              "   'index': 0,\n",
              "   'message': {'role': 'assistant',\n",
              "    'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
              " 'created': 1692135900.2806294,\n",
              " 'model': 'q841o8w'}"
            ]
          },
          "metadata": {},
          "execution_count": 19
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
        "### Pass Your Baseten model `Version ID` as `model`"
      ],
      "metadata": {
        "id": "6-TFwmPAGPXq"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model = \"31dxrj3\"\n",
        "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
        "response"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gbeYZOrUE_Bp",
        "outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
      },
      "execution_count": 20,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32mINFO\u001b[0m API key set.\n",
            "INFO:baseten:API key set.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'choices': [{'finish_reason': 'stop',\n",
              "   'index': 0,\n",
              "   'message': {'role': 'assistant',\n",
              "    'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
              " 'created': 1692135914.7472186,\n",
              " 'model': '31dxrj3'}"
            ]
          },
          "metadata": {},
          "execution_count": 20
        }
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
-  ]
+   ],
   "source": [
    "model = \"qvv0xeq\"\n",
    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
    "response"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "7n21UroEGCGa"
   },
   "source": [
    "## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
    "### Pass Your Baseten model `Version ID` as `model`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "uLVWFH899lAF",
    "outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32mINFO\u001b[0m API key set.\n",
      "INFO:baseten:API key set.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'choices': [{'finish_reason': 'stop',\n",
       "   'index': 0,\n",
       "   'message': {'role': 'assistant',\n",
       "    'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
       " 'created': 1692135900.2806294,\n",
       " 'model': 'q841o8w'}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = \"q841o8w\"\n",
    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
    "response"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "6-TFwmPAGPXq"
   },
   "source": [
    "## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
    "### Pass Your Baseten model `Version ID` as `model`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "gbeYZOrUE_Bp",
    "outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32mINFO\u001b[0m API key set.\n",
      "INFO:baseten:API key set.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'choices': [{'finish_reason': 'stop',\n",
       "   'index': 0,\n",
       "   'message': {'role': 'assistant',\n",
       "    'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
       " 'created': 1692135914.7472186,\n",
       " 'model': '31dxrj3'}"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = \"31dxrj3\"\n",
    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
    "response"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/cookbook/liteLLM_Langchain_Demo.ipynb
+++ b/cookbook/liteLLM_Langchain_Demo.ipynb
@ -1,201 +1,195 @@
 {
-  "nbformat": 4,
+ "cells": [
-  "nbformat_minor": 0,
+  {
-  "metadata": {
+   "cell_type": "markdown",
-    "colab": {
+   "metadata": {
-      "provenance": []
+    "id": "5hwntUxTMxEk"
-    },
+   },
-    "kernelspec": {
+   "source": [
-      "name": "python3",
+    "# Langchain liteLLM Demo Notebook\n",
-      "display_name": "Python 3"
+    "## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
-    },
+    "Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
-    "language_info": {
+    "\n",
-      "name": "python"
+    "Call all LLM models using the same I/O interface\n",
-    }
+    "\n",
    "Example usage\n",
    "```python\n",
    "ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
    "ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
    "ChatLiteLLM(model=\"command-nightly\")\n",
    "ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
    "```"
   ]
  },
-  "cells": [
+  {
-    {
+   "cell_type": "code",
-      "cell_type": "markdown",
+   "execution_count": null,
-      "source": [
+   "metadata": {
-        "# Langchain liteLLM Demo Notebook\n",
+    "id": "aPNAUsCvB6Sv"
-        "## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
+   },
-        "Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
+   "outputs": [],
-        "\n",
+   "source": [
-        "Call all LLM models using the same I/O interface\n",
+    "!pip install litellm langchain"
-        "\n",
+   ]
-        "Example usage\n",
+  },
-        "```python\n",
+  {
-        "ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
+   "cell_type": "code",
-        "ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
+   "execution_count": 2,
-        "ChatLiteLLM(model=\"command-nightly\")\n",
+   "metadata": {
-        "ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
+    "id": "MOhRaVnhB-0J"
-        "```"
+   },
-      ],
+   "outputs": [],
-      "metadata": {
+   "source": [
-        "id": "5hwntUxTMxEk"
+    "import os\n",
-      }
+    "from langchain.chat_models import ChatLiteLLM\n",
    "from langchain.schema import HumanMessage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "TahkCtlmCD65",
    "outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
   },
   "outputs": [
    {
-      "cell_type": "code",
+     "data": {
-      "execution_count": null,
+      "text/plain": [
-      "metadata": {
+       "AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
        "id": "aPNAUsCvB6Sv"
      },
      "outputs": [],
      "source": [
        "!pip install litellm langchain"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "from langchain.chat_models import ChatLiteLLM\n",
        "from langchain.prompts.chat import (\n",
        "    ChatPromptTemplate,\n",
        "    SystemMessagePromptTemplate,\n",
        "    AIMessagePromptTemplate,\n",
        "    HumanMessagePromptTemplate,\n",
        ")\n",
        "from langchain.schema import AIMessage, HumanMessage, SystemMessage"
      ],
      "metadata": {
        "id": "MOhRaVnhB-0J"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "os.environ['OPENAI_API_KEY'] = \"\"\n",
        "chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
        "messages = [\n",
        "    HumanMessage(\n",
        "        content=\"what model are you\"\n",
        "    )\n",
        "]\n",
        "chat(messages)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "TahkCtlmCD65",
        "outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
      },
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
            ]
          },
          "metadata": {},
          "execution_count": 17
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
        "chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
        "messages = [\n",
        "    HumanMessage(\n",
        "        content=\"what model are you\"\n",
        "    )\n",
        "]\n",
        "chat(messages)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "uXNDyU4jChcs",
        "outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
      },
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
            ]
          },
          "metadata": {},
          "execution_count": 23
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
        "chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
        "messages = [\n",
        "    HumanMessage(\n",
        "        content=\"what model are you?\"\n",
        "    )\n",
        "]\n",
        "chat(messages)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "czbDJRKcC7BV",
        "outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
      },
      "execution_count": 27,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
            ]
          },
          "metadata": {},
          "execution_count": 27
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "os.environ['COHERE_API_KEY'] = \"\"\n",
        "chat = ChatLiteLLM(model=\"command-nightly\")\n",
        "messages = [\n",
        "    HumanMessage(\n",
        "        content=\"what model are you?\"\n",
        "    )\n",
        "]\n",
        "chat(messages)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "tZxpq5PDDY9Y",
        "outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
      },
      "execution_count": 30,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
            ]
          },
          "metadata": {},
          "execution_count": 30
        }
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
-  ]
+   ],
   "source": [
    "os.environ['OPENAI_API_KEY'] = \"\"\n",
    "chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
    "messages = [\n",
    "    HumanMessage(\n",
    "        content=\"what model are you\"\n",
    "    )\n",
    "]\n",
    "chat(messages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "uXNDyU4jChcs",
    "outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
    "chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
    "messages = [\n",
    "    HumanMessage(\n",
    "        content=\"what model are you\"\n",
    "    )\n",
    "]\n",
    "chat(messages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "czbDJRKcC7BV",
    "outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
    "chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
    "messages = [\n",
    "    HumanMessage(\n",
    "        content=\"what model are you?\"\n",
    "    )\n",
    "]\n",
    "chat(messages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "tZxpq5PDDY9Y",
    "outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.environ['COHERE_API_KEY'] = \"\"\n",
    "chat = ChatLiteLLM(model=\"command-nightly\")\n",
    "messages = [\n",
    "    HumanMessage(\n",
    "        content=\"what model are you?\"\n",
    "    )\n",
    "]\n",
    "chat(messages)"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/cookbook/liteLLM_VertextAI_Example.ipynb
+++ b/cookbook/liteLLM_VertextAI_Example.ipynb
@ -43,7 +43,7 @@
   "source": [
    "# set you Vertex AI configs\n",
    "import litellm\n",
-    "from litellm import embedding, completion\n",
+    "from litellm import completion\n",
    "\n",
    "litellm.vertex_project = \"hardy-device-386718\"\n",
    "litellm.vertex_location = \"us-central1\""
--- a/cookbook/liteLLM_function_calling.ipynb
+++ b/cookbook/liteLLM_function_calling.ipynb
@ -1,331 +1,331 @@
 {
-  "nbformat": 4,
+ "cells": [
-  "nbformat_minor": 0,
+  {
-  "metadata": {
+   "cell_type": "markdown",
-    "colab": {
+   "metadata": {
-      "provenance": []
+    "id": "vnvlwUDZK7VA"
-    },
+   },
-    "kernelspec": {
+   "source": [
-      "name": "python3",
+    "## Demo Notebook of Function Calling with liteLLM\n",
-      "display_name": "Python 3"
+    "- Supported Providers for Function Calling\n",
-    },
+    "  - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
-    "language_info": {
+    "- In this notebook we use function calling with `litellm.completion()`"
-      "name": "python"
+   ]
    }
  },
-  "cells": [
+  {
-    {
+   "cell_type": "code",
-      "cell_type": "markdown",
+   "execution_count": null,
-      "source": [
+   "metadata": {
-        "## Demo Notebook of Function Calling with liteLLM\n",
+    "id": "KrINCwRfLgZV"
-        "- Supported Providers for Function Calling\n",
+   },
-        "  - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
+   "outputs": [],
-        "- In this notebook we use function calling with `litellm.completion()`"
+   "source": [
-      ],
+    "## Install liteLLM\n",
-      "metadata": {
+    "!pip install litellm"
-        "id": "vnvlwUDZK7VA"
+   ]
-      }
+  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "id": "nK7zR5OgLlh2"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "from litellm import completion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "id": "dCQlyBxKLqbA"
   },
   "outputs": [],
   "source": [
    "os.environ['OPENAI_API_KEY'] = \"\" #@param"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "gfdGv-FMRCdX"
   },
   "source": [
    "## Define Messages, Functions\n",
    "We create a get_current_weather() function and pass that to GPT 3.5\n",
    "\n",
    "See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "id": "ERzsP1sfM19C"
   },
   "outputs": [],
   "source": [
    "messages = [\n",
    "    {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
    "]\n",
    "\n",
    "def get_current_weather(location):\n",
    "  if location == \"Boston, MA\":\n",
    "    return \"The weather is 12F\"\n",
    "\n",
    "functions = [\n",
    "    {\n",
    "      \"name\": \"get_current_weather\",\n",
    "      \"description\": \"Get the current weather in a given location\",\n",
    "      \"parameters\": {\n",
    "        \"type\": \"object\",\n",
    "        \"properties\": {\n",
    "          \"location\": {\n",
    "            \"type\": \"string\",\n",
    "            \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
    "          },\n",
    "          \"unit\": {\n",
    "            \"type\": \"string\",\n",
    "            \"enum\": [\"celsius\", \"fahrenheit\"]\n",
    "          }\n",
    "        },\n",
    "        \"required\": [\"location\"]\n",
    "      }\n",
    "    }\n",
    "  ]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "NX6by2VuRPnp"
   },
   "source": [
    "## Call gpt-3.5-turbo-0613 to Decide what Function to call"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "QVoJ5PtxMlVx",
    "outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
   },
   "outputs": [
    {
-      "cell_type": "code",
+     "name": "stdout",
-      "source": [
+     "output_type": "stream",
-        "## Install liteLLM\n",
+     "text": [
-        "!pip install litellm"
+      "{\n",
-      ],
+      "  \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
-      "metadata": {
+      "  \"object\": \"chat.completion\",\n",
-        "id": "KrINCwRfLgZV"
+      "  \"created\": 1691801223,\n",
-      },
+      "  \"model\": \"gpt-3.5-turbo-0613\",\n",
-      "execution_count": null,
+      "  \"choices\": [\n",
-      "outputs": []
+      "    {\n",
-    },
+      "      \"index\": 0,\n",
-    {
+      "      \"message\": {\n",
-      "cell_type": "code",
+      "        \"role\": \"assistant\",\n",
-      "source": [
+      "        \"content\": null,\n",
-        "import os, litellm\n",
+      "        \"function_call\": {\n",
-        "from litellm import completion"
+      "          \"name\": \"get_current_weather\",\n",
-      ],
+      "          \"arguments\": \"{\\n  \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
-      "metadata": {
+      "        }\n",
-        "id": "nK7zR5OgLlh2"
+      "      },\n",
-      },
+      "      \"finish_reason\": \"function_call\"\n",
-      "execution_count": 2,
+      "    }\n",
-      "outputs": []
+      "  ],\n",
-    },
+      "  \"usage\": {\n",
-    {
+      "    \"prompt_tokens\": 82,\n",
-      "cell_type": "code",
+      "    \"completion_tokens\": 18,\n",
-      "source": [
+      "    \"total_tokens\": 100\n",
-        "os.environ['OPENAI_API_KEY'] = \"\" #@param"
+      "  }\n",
-      ],
+      "}\n"
-      "metadata": {
+     ]
        "id": "dCQlyBxKLqbA"
      },
      "execution_count": 27,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Define Messages, Functions\n",
        "We create a get_current_weather() function and pass that to GPT 3.5\n",
        "\n",
        "See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
      ],
      "metadata": {
        "id": "gfdGv-FMRCdX"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "messages = [\n",
        "    {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
        "]\n",
        "\n",
        "def get_current_weather(location):\n",
        "  if location == \"Boston, MA\":\n",
        "    return \"The weather is 12F\"\n",
        "\n",
        "functions = [\n",
        "    {\n",
        "      \"name\": \"get_current_weather\",\n",
        "      \"description\": \"Get the current weather in a given location\",\n",
        "      \"parameters\": {\n",
        "        \"type\": \"object\",\n",
        "        \"properties\": {\n",
        "          \"location\": {\n",
        "            \"type\": \"string\",\n",
        "            \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
        "          },\n",
        "          \"unit\": {\n",
        "            \"type\": \"string\",\n",
        "            \"enum\": [\"celsius\", \"fahrenheit\"]\n",
        "          }\n",
        "        },\n",
        "        \"required\": [\"location\"]\n",
        "      }\n",
        "    }\n",
        "  ]"
      ],
      "metadata": {
        "id": "ERzsP1sfM19C"
      },
      "execution_count": 25,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Call gpt-3.5-turbo-0613 to Decide what Function to call"
      ],
      "metadata": {
        "id": "NX6by2VuRPnp"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
        "print(response)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "QVoJ5PtxMlVx",
        "outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{\n",
            "  \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
            "  \"object\": \"chat.completion\",\n",
            "  \"created\": 1691801223,\n",
            "  \"model\": \"gpt-3.5-turbo-0613\",\n",
            "  \"choices\": [\n",
            "    {\n",
            "      \"index\": 0,\n",
            "      \"message\": {\n",
            "        \"role\": \"assistant\",\n",
            "        \"content\": null,\n",
            "        \"function_call\": {\n",
            "          \"name\": \"get_current_weather\",\n",
            "          \"arguments\": \"{\\n  \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
            "        }\n",
            "      },\n",
            "      \"finish_reason\": \"function_call\"\n",
            "    }\n",
            "  ],\n",
            "  \"usage\": {\n",
            "    \"prompt_tokens\": 82,\n",
            "    \"completion_tokens\": 18,\n",
            "    \"total_tokens\": 100\n",
            "  }\n",
            "}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Parse GPT 3.5 Response\n",
        "Read Information about what Function to Call"
      ],
      "metadata": {
        "id": "Yu0o2saDNLx8"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
        "function_call_data"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "u1DzXLJsNOR5",
        "outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<OpenAIObject at 0x7922c70ce930> JSON: {\n",
              "  \"name\": \"get_current_weather\",\n",
              "  \"arguments\": \"{\\n  \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
              "}"
            ]
          },
          "metadata": {},
          "execution_count": 11
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import json\n",
        "function_name = function_call_data['name']\n",
        "function_args = function_call_data['arguments']\n",
        "function_args = json.loads(function_args)\n",
        "print(function_name, function_args)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "tYb96Mh0NhH9",
        "outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
      },
      "execution_count": 20,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "get_current_weather {'location': 'Boston, MA'}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Call the get_current_weather() function"
      ],
      "metadata": {
        "id": "z3tstH_yN3fX"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "if function_name == \"get_current_weather\":\n",
        "  result = get_current_weather(**function_args)\n",
        "  print(result)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "TSb8JHhgN5Zc",
        "outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
      },
      "execution_count": 24,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "12F\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Send the response from get_current_weather back to the model to summarize"
      ],
      "metadata": {
        "id": "k4HGJE3NRmMI"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "messages = [\n",
        "    {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
        "    {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
        "    {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
        "]\n",
        "response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
        "print(response)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "a23cmEwiPaw7",
        "outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
      },
      "execution_count": 26,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{\n",
            "  \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
            "  \"object\": \"chat.completion\",\n",
            "  \"created\": 1691801963,\n",
            "  \"model\": \"gpt-3.5-turbo-0613\",\n",
            "  \"choices\": [\n",
            "    {\n",
            "      \"index\": 0,\n",
            "      \"message\": {\n",
            "        \"role\": \"assistant\",\n",
            "        \"content\": \"The current weather in Boston is 12 degrees Fahrenheit.\"\n",
            "      },\n",
            "      \"finish_reason\": \"stop\"\n",
            "    }\n",
            "  ],\n",
            "  \"usage\": {\n",
            "    \"prompt_tokens\": 109,\n",
            "    \"completion_tokens\": 12,\n",
            "    \"total_tokens\": 121\n",
            "  }\n",
            "}\n"
          ]
        }
      ]
    }
-  ]
+   ],
   "source": [
    "response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Yu0o2saDNLx8"
   },
   "source": [
    "## Parse GPT 3.5 Response\n",
    "Read Information about what Function to Call"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "u1DzXLJsNOR5",
    "outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<OpenAIObject at 0x7922c70ce930> JSON: {\n",
       "  \"name\": \"get_current_weather\",\n",
       "  \"arguments\": \"{\\n  \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
       "}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
    "function_call_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "tYb96Mh0NhH9",
    "outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "get_current_weather {'location': 'Boston, MA'}\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "function_name = function_call_data['name']\n",
    "function_args = function_call_data['arguments']\n",
    "function_args = json.loads(function_args)\n",
    "print(function_name, function_args)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "z3tstH_yN3fX"
   },
   "source": [
    "## Call the get_current_weather() function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "TSb8JHhgN5Zc",
    "outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "12F\n"
     ]
    }
   ],
   "source": [
    "if function_name == \"get_current_weather\":\n",
    "  result = get_current_weather(**function_args)\n",
    "  print(result)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "k4HGJE3NRmMI"
   },
   "source": [
    "## Send the response from get_current_weather back to the model to summarize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "a23cmEwiPaw7",
    "outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
      "  \"object\": \"chat.completion\",\n",
      "  \"created\": 1691801963,\n",
      "  \"model\": \"gpt-3.5-turbo-0613\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"index\": 0,\n",
      "      \"message\": {\n",
      "        \"role\": \"assistant\",\n",
      "        \"content\": \"The current weather in Boston is 12 degrees Fahrenheit.\"\n",
      "      },\n",
      "      \"finish_reason\": \"stop\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 109,\n",
      "    \"completion_tokens\": 12,\n",
      "    \"total_tokens\": 121\n",
      "  }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "messages = [\n",
    "    {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
    "    {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
    "    {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
    "]\n",
    "response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
    "print(response)"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/cookbook/litellm-ollama-docker-image/test.py
+++ b/cookbook/litellm-ollama-docker-image/test.py
@ -1,13 +1,13 @@
 import openai
-api_base = f"http://0.0.0.0:8000"
+api_base = "http://0.0.0.0:8000"
 openai.api_base = api_base
 openai.api_key = "temp-key"
 print(openai.api_base)
-print(f"LiteLLM: response from proxy with streaming")
+print("LiteLLM: response from proxy with streaming")
 response = openai.ChatCompletion.create(
    model="ollama/llama2",
    messages=[
--- a/cookbook/litellm_Test_Multiple_Providers.ipynb
+++ b/cookbook/litellm_Test_Multiple_Providers.ipynb
--- a/cookbook/litellm_model_fallback.ipynb
+++ b/cookbook/litellm_model_fallback.ipynb
@ -1,52 +1,51 @@
 {
-  "nbformat": 4,
+ "cells": [
-  "nbformat_minor": 0,
+  {
-  "metadata": {
+   "cell_type": "code",
-    "colab": {
+   "execution_count": null,
-      "provenance": []
+   "metadata": {
-    },
+    "id": "j6yJsCGeaq8G"
-    "kernelspec": {
+   },
-      "name": "python3",
+   "outputs": [],
-      "display_name": "Python 3"
+   "source": [
-    },
+    "!pip install litellm"
-    "language_info": {
+   ]
      "name": "python"
    }
  },
-  "cells": [
+  {
-    {
+   "cell_type": "code",
-      "cell_type": "code",
+   "execution_count": null,
-      "source": [
+   "metadata": {
-        "!pip install litellm"
+    "id": "u129iWNPaf72"
-      ],
+   },
-      "metadata": {
+   "outputs": [],
-        "id": "j6yJsCGeaq8G"
+   "source": [
-      },
+    "from litellm import completion\n",
-      "execution_count": null,
+    "\n",
-      "outputs": []
+    "model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n",
-    },
+    "\n",
-    {
+    "user_message = \"Hello, how are you?\"\n",
-      "cell_type": "code",
+    "messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
-      "execution_count": null,
+    "\n",
-      "metadata": {
+    "for model in model_fallback_list:\n",
-        "id": "u129iWNPaf72"
+    "  try:\n",
-      },
+    "      response = completion(model=model, messages=messages)\n",
-      "outputs": [],
+    "  except Exception:\n",
-      "source": [
+    "      print(f\"error occurred: {traceback.format_exc()}\")"
-        "import litellm\n",
+   ]
-        "from litellm import embedding, completion\n",
+  }
-        "\n",
+ ],
-        "model_fallback_list = [\"claude-instant-1\", \"gpt-3.5-turbo\", \"chatgpt-test\"]\n",
+ "metadata": {
-        "\n",
+  "colab": {
-        "user_message = \"Hello, how are you?\"\n",
+   "provenance": []
-        "messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
+  },
-        "\n",
+  "kernelspec": {
-        "for model in model_fallback_list:\n",
+   "display_name": "Python 3",
-        "  try:\n",
+   "name": "python3"
-        "      response = completion(model=model, messages=messages)\n",
+  },
-        "  except Exception as e:\n",
+  "language_info": {
-        "      print(f\"error occurred: {traceback.format_exc()}\")"
+   "name": "python"
-      ]
+  }
-    }
+ },
-  ]
+ "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/cookbook/litellm_router/load_test_proxy.py
+++ b/cookbook/litellm_router/load_test_proxy.py
@ -1,14 +1,12 @@
-import sys, os
+import sys
-import traceback
+import os
 from dotenv import load_dotenv
 load_dotenv()
 import os, io
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
 from litellm import Router
 import litellm
@ -137,7 +135,7 @@ for future in futures:
    else:
        failed_calls += 1
-print(f"Load test Summary:")
+print("Load test Summary:")
 print(f"Total Requests: {concurrent_calls}")
 print(f"Successful Calls: {successful_calls}")
 print(f"Failed Calls: {failed_calls}")
--- a/cookbook/litellm_router/load_test_queuing.py
+++ b/cookbook/litellm_router/load_test_queuing.py
@ -1,14 +1,12 @@
-import sys, os
+import sys
-import traceback
+import os
 from dotenv import load_dotenv
 load_dotenv()
 import os, io
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
 from litellm import Router
 import litellm
@ -160,7 +158,7 @@ for future in futures:
        else:
            failed_calls += 1
-print(f"Load test Summary:")
+print("Load test Summary:")
 print(f"Total Requests: {concurrent_calls}")
 print(f"Successful Calls: {successful_calls}")
 print(f"Failed Calls: {failed_calls}")
--- a/cookbook/litellm_router/load_test_router.py
+++ b/cookbook/litellm_router/load_test_router.py
@ -1,14 +1,12 @@
-import sys, os
+import sys
-import traceback
+import os
 from dotenv import load_dotenv
 load_dotenv()
 import os, io
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
 from litellm import Router
 import litellm
@ -132,7 +130,7 @@ for future in futures:
    else:
        failed_calls += 1
-print(f"Load test Summary:")
+print("Load test Summary:")
 print(f"Total Requests: {concurrent_calls}")
 print(f"Successful Calls: {successful_calls}")
 print(f"Failed Calls: {failed_calls}")
--- a/cookbook/litellm_router_load_test/memory_usage/router_endpoint.py
+++ b/cookbook/litellm_router_load_test/memory_usage/router_endpoint.py
@ -1,14 +1,9 @@
 from fastapi import FastAPI
 import uvicorn
-from memory_profiler import profile, memory_usage
+from memory_profiler import profile
 import os
 import traceback
 import asyncio
 import pytest
 import litellm
 from litellm import Router
 from concurrent.futures import ThreadPoolExecutor
 from collections import defaultdict
 from dotenv import load_dotenv
 import uuid
--- a/cookbook/litellm_router_load_test/memory_usage/router_memory_usage
+++ b/cookbook/litellm_router_load_test/memory_usage/router_memory_usage
@ -1,17 +1,16 @@
 #### What this tests ####
-from memory_profiler import profile, memory_usage
+from memory_profiler import profile
-import sys, os, time
+import sys
-import traceback, asyncio
+import os
-import pytest
+import time
 import asyncio
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
 from litellm import Router
 from concurrent.futures import ThreadPoolExecutor
 from collections import defaultdict
 from dotenv import load_dotenv
 import uuid
--- a/cookbook/litellm_router_load_test/memory_usage/router_memory_usage.py
+++ b/cookbook/litellm_router_load_test/memory_usage/router_memory_usage.py
@ -1,17 +1,16 @@
 #### What this tests ####
-from memory_profiler import profile, memory_usage
+from memory_profiler import profile
-import sys, os, time
+import sys
-import traceback, asyncio
+import os
-import pytest
+import time
 import asyncio
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
 from litellm import Router
 from concurrent.futures import ThreadPoolExecutor
 from collections import defaultdict
 from dotenv import load_dotenv
 import uuid
--- a/cookbook/litellm_router_load_test/test_loadtest_openai_client.py
+++ b/cookbook/litellm_router_load_test/test_loadtest_openai_client.py
@ -1,17 +1,14 @@
-import sys, os
+import sys
-import traceback
+import os
 from dotenv import load_dotenv
 import copy
 load_dotenv()
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import asyncio
-from litellm import Router, Timeout
+from litellm import Timeout
 import time
 from litellm.caching.caching import Cache
 import litellm
 import openai
 ### Test just calling AsyncAzureOpenAI
--- a/cookbook/litellm_router_load_test/test_loadtest_router.py
+++ b/cookbook/litellm_router_load_test/test_loadtest_router.py
@ -1,7 +1,6 @@
-import sys, os
+import sys
-import traceback
+import os
 from dotenv import load_dotenv
 import copy
 load_dotenv()
 sys.path.insert(
--- a/cookbook/litellm_router_load_test/test_loadtest_router_withs3_cache.py
+++ b/cookbook/litellm_router_load_test/test_loadtest_router_withs3_cache.py
@ -1,7 +1,6 @@
-import sys, os
+import sys
-import traceback
+import os
 from dotenv import load_dotenv
 import copy
 load_dotenv()
 sys.path.insert(
--- a/cookbook/misc/add_new_models.py
+++ b/cookbook/misc/add_new_models.py
@ -1,5 +1,4 @@
 import requests
 import json
 def get_initial_config():
--- a/cookbook/misc/migrate_proxy_config.py
+++ b/cookbook/misc/migrate_proxy_config.py
@ -36,7 +36,7 @@ def migrate_models(config_file, proxy_base_url):
        litellm_model_name = litellm_params.get("model", "") or ""
        if "vertex_ai/" in litellm_model_name:
-            print(f"\033[91m\nSkipping Vertex AI model\033[0m", model)
+            print("\033[91m\nSkipping Vertex AI model\033[0m", model)
            continue
        for param, value in litellm_params.items():
--- a/cookbook/misc/openai_timeouts.py
+++ b/cookbook/misc/openai_timeouts.py
@ -1,7 +1,6 @@
 import os
 from openai import OpenAI
 from dotenv import load_dotenv
 import httpx
 import concurrent.futures
 load_dotenv()
--- a/cookbook/misc/sagmaker_streaming.py
+++ b/cookbook/misc/sagmaker_streaming.py
@ -2,21 +2,16 @@
 import json
 import boto3
-import sys, os
+import sys
-import traceback
+import os
 from dotenv import load_dotenv
 load_dotenv()
-import os, io
+import io
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
 import litellm
 import io
 import json
 class TokenIterator:
@ -48,7 +43,6 @@ payload = {
    "stream": True,
 }
 import boto3
 client = boto3.client("sagemaker-runtime", region_name="us-west-2")
 response = client.invoke_endpoint_with_response_stream(
--- a/cookbook/mlflow_langchain_tracing_litellm_proxy.ipynb
+++ b/cookbook/mlflow_langchain_tracing_litellm_proxy.ipynb
@ -111,7 +111,6 @@
   },
   "outputs": [],
   "source": [
    "import mlflow\n",
    "mlflow.langchain.autolog()"
   ]
  },
--- a/db_scripts/create_views.py
+++ b/db_scripts/create_views.py
@ -3,7 +3,6 @@ python script to pre-create all views required by LiteLLM Proxy Server
 """
 import asyncio
 import os
 # Enter your DATABASE_URL here
@ -33,7 +32,7 @@ async def check_view_exists():  # noqa: PLR0915
        # Try to select one row from the view
        await db.query_raw("""SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1""")
        print("LiteLLM_VerificationTokenView Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        # If an error occurs, the view does not exist, so create it
        await db.execute_raw(
            """
@ -54,7 +53,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""")
        print("MonthlyGlobalSpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS 
        SELECT
@ -74,7 +73,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "Last30dKeysBySpend" LIMIT 1""")
        print("Last30dKeysBySpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE OR REPLACE VIEW "Last30dKeysBySpend" AS
        SELECT 
@ -102,7 +101,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "Last30dModelsBySpend" LIMIT 1""")
        print("Last30dModelsBySpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE OR REPLACE VIEW "Last30dModelsBySpend" AS
        SELECT
@ -124,7 +123,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpendPerKey" LIMIT 1""")
        print("MonthlyGlobalSpendPerKey Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
            CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerKey" AS 
            SELECT
@ -147,7 +146,7 @@ async def check_view_exists():  # noqa: PLR0915
            """SELECT 1 FROM "MonthlyGlobalSpendPerUserPerKey" LIMIT 1"""
        )
        print("MonthlyGlobalSpendPerUserPerKey Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
            CREATE OR REPLACE VIEW "MonthlyGlobalSpendPerUserPerKey" AS 
            SELECT
@ -171,7 +170,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM DailyTagSpend LIMIT 1""")
        print("DailyTagSpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE OR REPLACE VIEW DailyTagSpend AS
        SELECT
@ -189,7 +188,7 @@ async def check_view_exists():  # noqa: PLR0915
    try:
        await db.query_raw("""SELECT 1 FROM "Last30dTopEndUsersSpend" LIMIT 1""")
        print("Last30dTopEndUsersSpend Exists!")  # noqa
-    except Exception as e:
+    except Exception:
        sql_query = """
        CREATE VIEW "Last30dTopEndUsersSpend" AS
        SELECT end_user, COUNT(*) AS total_events, SUM(spend) AS total_spend
--- a/enterprise/enterprise_callbacks/example_logging_api.py
+++ b/enterprise/enterprise_callbacks/example_logging_api.py
@ -17,7 +17,7 @@ async def log_event(request: Request):
        # For now, just printing the received data
        return {"message": "Request received successfully"}
-    except Exception as e:
+    except Exception:
        raise HTTPException(status_code=500, detail="Internal Server Error")
--- a/enterprise/enterprise_callbacks/generic_api_callback.py
+++ b/enterprise/enterprise_callbacks/generic_api_callback.py
@ -2,12 +2,10 @@
 #### What this does ####
 #    On success, logs events to Promptlayer
-import dotenv, os
+import os
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.caching.caching import DualCache
-from typing import Literal, Union, Optional
+from typing import Optional
 import traceback
@ -15,10 +13,8 @@ import traceback
 #### What this does ####
 #    On success + failure, log events to Supabase
-import dotenv, os
+import litellm
-import traceback
+import uuid
 import datetime, subprocess, sys
 import litellm, uuid
 from litellm._logging import print_verbose, verbose_logger
--- a/enterprise/enterprise_hooks/aporia_ai.py
+++ b/enterprise/enterprise_hooks/aporia_ai.py
@ -11,9 +11,9 @@ import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-from typing import Optional, Literal, Union, Any
+from typing import Optional, Literal, Any
-import litellm, traceback, sys, uuid
+import litellm
-from litellm.caching.caching import DualCache
+import sys
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_guardrail import CustomGuardrail
 from fastapi import HTTPException
@ -23,14 +23,10 @@ from litellm.litellm_core_utils.logging_utils import (
    convert_litellm_response_object_to_str,
 )
 from typing import List
 from datetime import datetime
 import aiohttp, asyncio
 from litellm._logging import verbose_proxy_logger
 from litellm.llms.custom_httpx.http_handler import (
    get_async_httpx_client,
    httpxSpecialProvider,
 )
 import httpx
 import json
 from litellm.types.guardrails import GuardrailEventHooks
@ -147,7 +143,6 @@ class AporiaGuardrail(CustomGuardrail):
        from litellm.proxy.common_utils.callback_utils import (
            add_guardrail_to_applied_guardrails_header,
        )
        from litellm.types.guardrails import GuardrailEventHooks
        """
        Use this for the post call moderation with Guardrails
@ -183,7 +178,6 @@ class AporiaGuardrail(CustomGuardrail):
        from litellm.proxy.common_utils.callback_utils import (
            add_guardrail_to_applied_guardrails_header,
        )
        from litellm.types.guardrails import GuardrailEventHooks
        event_type: GuardrailEventHooks = GuardrailEventHooks.during_call
        if self.should_run_guardrail(data=data, event_type=event_type) is not True:
--- a/enterprise/enterprise_hooks/banned_keywords.py
+++ b/enterprise/enterprise_hooks/banned_keywords.py
@ -7,14 +7,13 @@
 ## Reject a call / response if it contains certain keywords
-from typing import Optional, Literal
+from typing import Literal
 import litellm
 from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_proxy_logger
 from fastapi import HTTPException
 import json, traceback
 class _ENTERPRISE_BannedKeywords(CustomLogger):
@ -73,7 +72,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
            - check if user id part of call
            - check if user id part of blocked list
            """
-            self.print_verbose(f"Inside Banned Keyword List Pre-Call Hook")
+            self.print_verbose("Inside Banned Keyword List Pre-Call Hook")
            if call_type == "completion" and "messages" in data:
                for m in data["messages"]:
                    if "content" in m and isinstance(m["content"], str):
--- a/enterprise/enterprise_hooks/blocked_user_list.py
+++ b/enterprise/enterprise_hooks/blocked_user_list.py
@ -15,7 +15,6 @@ from litellm.proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
 from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_proxy_logger
 from fastapi import HTTPException
 import json, traceback
 class _ENTERPRISE_BlockedUserList(CustomLogger):
@ -69,7 +68,7 @@ class _ENTERPRISE_BlockedUserList(CustomLogger):
                - check if end-user in cache
                - check if end-user in db
            """
-            self.print_verbose(f"Inside Blocked User List Pre-Call Hook")
+            self.print_verbose("Inside Blocked User List Pre-Call Hook")
            if "user_id" in data or "user" in data:
                user = data.get("user_id", data.get("user", ""))
                if (
--- a/enterprise/enterprise_hooks/google_text_moderation.py
+++ b/enterprise/enterprise_hooks/google_text_moderation.py
@ -7,21 +7,12 @@
 #  Thank you users! We ❤️ you! - Krrish & Ishaan
-from typing import Optional, Literal, Union
+from typing import Literal
-import litellm, traceback, sys, uuid
+import litellm
 from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
 from litellm.utils import (
    ModelResponse,
    EmbeddingResponse,
    ImageResponse,
    StreamingChoices,
 )
 from datetime import datetime
 import aiohttp, asyncio
 class _ENTERPRISE_GoogleTextModeration(CustomLogger):
--- a/enterprise/enterprise_hooks/llama_guard.py
+++ b/enterprise/enterprise_hooks/llama_guard.py
@ -7,28 +7,24 @@
 # +-------------------------------------------------------------+
 #  Thank you users! We ❤️ you! - Krrish & Ishaan
-import sys, os
+import sys
 import os
 from collections.abc import Iterable
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-from typing import Optional, Literal, Union
+from typing import Optional, Literal
-import litellm, traceback, sys, uuid
+import litellm
-from litellm.caching.caching import DualCache
+import sys
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
 from litellm.types.utils import (
    ModelResponse,
    EmbeddingResponse,
    ImageResponse,
    StreamingChoices,
    Choices,
 )
 from datetime import datetime
 import aiohttp, asyncio
 litellm.set_verbose = True
--- a/enterprise/enterprise_hooks/llm_guard.py
+++ b/enterprise/enterprise_hooks/llm_guard.py
@ -7,26 +7,13 @@
 #  Thank you users! We ❤️ you! - Krrish & Ishaan
 ## This provides an LLM Guard Integration for content moderation on the proxy
-from typing import Optional, Literal, Union
+from typing import Optional, Literal
 import litellm
 import traceback
 import sys
 import uuid
 import os
 from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
 from litellm.utils import (
    ModelResponse,
    EmbeddingResponse,
    ImageResponse,
    StreamingChoices,
 )
 from datetime import datetime
 import aiohttp
 import asyncio
 from litellm.utils import get_formatted_prompt
 from litellm.secret_managers.main import get_secret_str
@ -164,7 +151,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
                "moderation",
                "audio_transcription",
            ]
-        except Exception as e:
+        except Exception:
            self.print_verbose(
                f"Call Type - {call_type}, not in accepted list - ['completion','embeddings','image_generation','moderation','audio_transcription']"
            )
--- a/enterprise/enterprise_hooks/openai_moderation.py
+++ b/enterprise/enterprise_hooks/openai_moderation.py
@ -5,27 +5,19 @@
 # +-------------------------------------------------------------+
 #  Thank you users! We ❤️ you! - Krrish & Ishaan
-import sys, os
+import sys
 import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-from typing import Optional, Literal, Union
+from typing import Literal
-import litellm, traceback, sys, uuid
+import litellm
-from litellm.caching.caching import DualCache
+import sys
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
 from litellm.utils import (
    ModelResponse,
    EmbeddingResponse,
    ImageResponse,
    StreamingChoices,
 )
 from datetime import datetime
 import aiohttp, asyncio
 from litellm._logging import verbose_proxy_logger
 litellm.set_verbose = True
--- a/enterprise/enterprise_hooks/secret_detection.py
+++ b/enterprise/enterprise_hooks/secret_detection.py
@ -471,8 +471,6 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
        data: dict,
        call_type: str,  # "completion", "embeddings", "image_generation", "moderation"
    ):
        from detect_secrets import SecretsCollection
        from detect_secrets.settings import default_settings
        if await self.should_run_check(user_api_key_dict) is False:
            return
--- a/enterprise/utils.py
+++ b/enterprise/utils.py
@ -1,6 +1,5 @@
 # Enterprise Proxy Util Endpoints
 from typing import Optional, List
 from litellm._logging import verbose_logger
 from litellm.proxy.proxy_server import PrismaClient, HTTPException
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
 import collections
@ -116,7 +115,7 @@ async def ui_get_spend_by_tags(
 def _forecast_daily_cost(data: list):
-    from datetime import datetime, timedelta
+    from datetime import timedelta
    if len(data) == 0:
        return {
--- a/litellm/init.py
+++ b/litellm/init.py
@ -1063,9 +1063,9 @@ from .llms.sagemaker.chat.transformation import SagemakerChatConfig
 from .llms.ollama_chat import OllamaChatConfig
 from .llms.bedrock.chat.invoke_handler import (
    AmazonCohereChatConfig,
    AmazonConverseConfig,
    bedrock_tool_name_mappings,
 )
 from .llms.bedrock.chat.converse_transformation import AmazonConverseConfig
 from .llms.bedrock.common_utils import (
    AmazonTitanConfig,
    AmazonAI21Config,
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@ -1,7 +1,6 @@
 import json
 import logging
 import os
 import traceback
 from datetime import datetime
 from logging import Formatter
--- a/litellm/_redis.py
+++ b/litellm/_redis.py
@ -12,12 +12,11 @@ import json
 # s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
 import os
-from typing import Dict, List, Optional, Union
+from typing import List, Optional, Union
 import redis  # type: ignore
 import redis.asyncio as async_redis  # type: ignore
 import litellm
 from litellm import get_secret, get_secret_str
 from ._logging import verbose_logger
--- a/litellm/adapters/anthropic_adapter.py
+++ b/litellm/adapters/anthropic_adapter.py
@ -1,23 +1,12 @@
 # What is this?
 ## Translates OpenAI call to Anthropic `/v1/messages` format
 import json
 import os
 import traceback
-import uuid
+from typing import Any, Optional
 from typing import Any, Literal, Optional
 import dotenv
 import httpx
 from pydantic import BaseModel
 import litellm
 from litellm import ChatCompletionRequest, verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.types.llms.anthropic import (
+from litellm.types.llms.anthropic import AnthropicMessagesRequest, AnthropicResponse
    AnthropicMessagesRequest,
    AnthropicResponse,
    ContentBlockDelta,
 )
 from litellm.types.utils import AdapterCompletionStreamWrapper, ModelResponse
--- a/litellm/assistants/main.py
+++ b/litellm/assistants/main.py
@ -7,12 +7,11 @@ from functools import partial
 from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union
 import httpx
-from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
+from openai import AsyncOpenAI, OpenAI
 from openai.types.beta.assistant import Assistant
 from openai.types.beta.assistant_deleted import AssistantDeleted
 import litellm
 from litellm.llms.azure import assistants
 from litellm.types.router import GenericLiteLLMParams
 from litellm.utils import (
    exception_type,
--- a/litellm/batch_completion/main.py
+++ b/litellm/batch_completion/main.py
@ -144,7 +144,6 @@ def batch_completion_models(*args, **kwargs):
        This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models.
        It sends requests concurrently and returns the response from the first model that responds.
    """
    import concurrent
    if "model" in kwargs:
        kwargs.pop("model")
--- a/litellm/batches/main.py
+++ b/litellm/batches/main.py
@ -19,24 +19,11 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union
 import httpx
 import litellm
 from litellm import client
 from litellm.llms.azure.azure import AzureBatchesAPI
 from litellm.llms.openai.openai import OpenAIBatchesAPI
-from litellm.llms.vertex_ai.batches.handler import (
+from litellm.llms.vertex_ai.batches.handler import VertexAIBatchPrediction
-    VertexAIBatchPrediction,
+from litellm.secret_managers.main import get_secret_str
-)
+from litellm.types.llms.openai import Batch, CreateBatchRequest, RetrieveBatchRequest
 from litellm.secret_managers.main import get_secret, get_secret_str
 from litellm.types.llms.openai import (
    Batch,
    CancelBatchRequest,
    CreateBatchRequest,
    CreateFileRequest,
    FileContentRequest,
    FileObject,
    FileTypes,
    HttpxBinaryResponseContent,
    RetrieveBatchRequest,
 )
 from litellm.types.router import GenericLiteLLMParams
 from litellm.utils import supports_httpx_timeout
--- a/litellm/budget_manager.py
+++ b/litellm/budget_manager.py
@ -11,7 +11,7 @@ import json
 import os
 import threading
 import time
-from typing import Literal, Optional, Union
+from typing import Literal, Optional
 import litellm
 from litellm.utils import ModelResponse
--- a/litellm/caching/caching.py
+++ b/litellm/caching/caching.py
@ -8,16 +8,12 @@
 #  Thank you users! We ❤️ you! - Krrish & Ishaan
 import ast
 import asyncio
 import hashlib
 import inspect
 import io
 import json
 import logging
 import time
 import traceback
 from enum import Enum
-from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
+from typing import Any, Dict, List, Optional, Set, Union
 from openai.types.audio.transcription_create_params import TranscriptionCreateParams
 from openai.types.chat.completion_create_params import (
@ -41,7 +37,7 @@ from litellm.types.utils import all_litellm_params
 from .base_cache import BaseCache
 from .disk_cache import DiskCache
-from .dual_cache import DualCache
+from .dual_cache import DualCache  # noqa
 from .in_memory_cache import InMemoryCache
 from .qdrant_semantic_cache import QdrantSemanticCache
 from .redis_cache import RedisCache
--- a/litellm/caching/caching_handler.py
+++ b/litellm/caching/caching_handler.py
@ -35,13 +35,7 @@ from pydantic import BaseModel
 import litellm
 from litellm._logging import print_verbose, verbose_logger
-from litellm.caching.caching import (
+from litellm.caching.caching import S3Cache
    Cache,
    QdrantSemanticCache,
    RedisCache,
    RedisSemanticCache,
    S3Cache,
 )
 from litellm.litellm_core_utils.logging_utils import (
    _assemble_complete_response_from_streaming_chunks,
 )
@ -550,12 +544,7 @@ class LLMCachingHandler:
        Returns:
            Optional[Any]:
        """
-        from litellm.utils import (
+        from litellm.utils import convert_to_model_response_object
            CustomStreamWrapper,
            convert_to_model_response_object,
            convert_to_streaming_response,
            convert_to_streaming_response_async,
        )
        if (
            call_type == CallTypes.acompletion.value
--- a/litellm/caching/disk_cache.py
+++ b/litellm/caching/disk_cache.py
@ -1,8 +1,6 @@
 import json
 from typing import TYPE_CHECKING, Any, Optional
 from litellm._logging import print_verbose
 from .base_cache import BaseCache
 if TYPE_CHECKING:
--- a/litellm/caching/dual_cache.py
+++ b/litellm/caching/dual_cache.py
@ -12,7 +12,7 @@ import asyncio
 import time
 import traceback
 from concurrent.futures import ThreadPoolExecutor
-from typing import TYPE_CHECKING, Any, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, List, Optional
 import litellm
 from litellm._logging import print_verbose, verbose_logger
--- a/litellm/caching/qdrant_semantic_cache.py
+++ b/litellm/caching/qdrant_semantic_cache.py
@ -15,7 +15,6 @@ from typing import Any
 import litellm
 from litellm._logging import print_verbose
 from litellm.types.caching import LiteLLMCacheType
 from .base_cache import BaseCache
--- a/litellm/caching/redis_cache.py
+++ b/litellm/caching/redis_cache.py
@ -13,7 +13,6 @@ import asyncio
 import inspect
 import json
 import time
 import traceback
 from datetime import timedelta
 from typing import TYPE_CHECKING, Any, List, Optional, Tuple
@ -21,8 +20,7 @@ import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
 from litellm.types.caching import RedisPipelineIncrementOperation
-from litellm.types.services import ServiceLoggerPayload, ServiceTypes
+from litellm.types.services import ServiceTypes
 from litellm.types.utils import all_litellm_params
 from .base_cache import BaseCache
@ -53,7 +51,6 @@ class RedisCache(BaseCache):
        startup_nodes: Optional[List] = None,  # for redis-cluster
        **kwargs,
    ):
        import redis
        from litellm._service_logger import ServiceLogging
--- a/litellm/caching/redis_semantic_cache.py
+++ b/litellm/caching/redis_semantic_cache.py
@ -32,7 +32,6 @@ class RedisSemanticCache(BaseCache):
        **kwargs,
    ):
        from redisvl.index import SearchIndex
        from redisvl.query import VectorQuery
        print_verbose(
            "redis semantic-cache initializing INDEX - litellm_semantic_cache_index"
@ -141,7 +140,6 @@ class RedisSemanticCache(BaseCache):
    def get_cache(self, key, **kwargs):
        print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}")
        import numpy as np
        from redisvl.query import VectorQuery
        # query
@ -253,7 +251,6 @@ class RedisSemanticCache(BaseCache):
    async def async_get_cache(self, key, **kwargs):
        print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}")
        import numpy as np
        from redisvl.query import VectorQuery
        from litellm.proxy.proxy_server import llm_model_list, llm_router
--- a/litellm/caching/s3_cache.py
+++ b/litellm/caching/s3_cache.py
@ -12,11 +12,9 @@ Has 4 methods:
 import ast
 import asyncio
 import json
-from typing import Any, Optional
+from typing import Optional
 import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.types.caching import LiteLLMCacheType
 from .base_cache import BaseCache
@ -103,7 +101,6 @@ class S3Cache(BaseCache):
        self.set_cache(key=key, value=value, **kwargs)
    def get_cache(self, key, **kwargs):
        import boto3
        import botocore
        try:
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -1,7 +1,6 @@
 # What is this?
 ## File for 'response_cost' calculation in Logging
 import time
 import traceback
 from typing import Any, List, Literal, Optional, Tuple, Union
 from pydantic import BaseModel
@ -44,14 +43,12 @@ from litellm.llms.openai.cost_calculation import (
    cost_per_second as openai_cost_per_second,
 )
 from litellm.llms.openai.cost_calculation import cost_per_token as openai_cost_per_token
 from litellm.llms.openai.cost_calculation import cost_router as openai_cost_router
 from litellm.llms.together_ai.cost_calculator import get_model_params_and_category
 from litellm.llms.vertex_ai.image_generation.cost_calculator import (
    cost_calculator as vertex_ai_image_cost_calculator,
 )
 from litellm.types.llms.openai import HttpxBinaryResponseContent
 from litellm.types.rerank import RerankResponse
 from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
 from litellm.types.utils import CallTypesLiteral, PassthroughCallTypes, Usage
 from litellm.utils import (
    CallTypes,
--- a/litellm/files/main.py
+++ b/litellm/files/main.py
@ -14,14 +14,11 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union, cast
 import httpx
 import litellm
-from litellm import client, get_secret_str
+from litellm import get_secret_str
 from litellm.llms.azure.files.handler import AzureOpenAIFilesAPI
 from litellm.llms.openai.openai import FileDeleted, FileObject, OpenAIFilesAPI
-from litellm.llms.vertex_ai.files.handler import (
+from litellm.llms.vertex_ai.files.handler import VertexAIFilesHandler
    VertexAIFilesHandler,
 )
 from litellm.types.llms.openai import (
    Batch,
    CreateFileRequest,
    FileContentRequest,
    FileTypes,
--- a/litellm/fine_tuning/main.py
+++ b/litellm/fine_tuning/main.py
@ -19,10 +19,10 @@ import httpx
 import litellm
 from litellm._logging import verbose_logger
 from litellm.llms.azure.fine_tuning.handler import AzureOpenAIFineTuningAPI
-from litellm.llms.openai.fine_tuning.handler import OpenAIFineTuningAPI, FineTuningJob, FineTuningJobCreate
+from litellm.llms.openai.fine_tuning.handler import FineTuningJob, OpenAIFineTuningAPI
 from litellm.llms.vertex_ai.fine_tuning.handler import VertexFineTuningAPI
 from litellm.secret_managers.main import get_secret_str
-from litellm.types.llms.openai import Hyperparameters
+from litellm.types.llms.openai import FineTuningJobCreate, Hyperparameters
 from litellm.types.router import *
 from litellm.utils import supports_httpx_timeout
--- a/litellm/integrations/SlackAlerting/batching_handler.py
+++ b/litellm/integrations/SlackAlerting/batching_handler.py
@ -6,11 +6,9 @@ Slack alerts are sent every 10s or when events are greater than X events
 see custom_batch_logger.py for more details / defaults 
 """
-import os
+from typing import TYPE_CHECKING, Any
 from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
-from litellm._logging import verbose_logger, verbose_proxy_logger
+from litellm._logging import verbose_proxy_logger
 from litellm.proxy._types import AlertType, WebhookEvent
 if TYPE_CHECKING:
    from .slack_alerting import SlackAlerting as _SlackAlerting
@ -21,7 +19,6 @@ else:
 def squash_payloads(queue):
    import json
    squashed = {}
    if len(queue) == 0:
--- a/litellm/integrations/SlackAlerting/slack_alerting.py
+++ b/litellm/integrations/SlackAlerting/slack_alerting.py
@ -4,16 +4,10 @@ import asyncio
 import datetime
 import os
 import random
 import threading
 import time
-import traceback
+from datetime import timedelta
-from datetime import datetime as dt
+from typing import Any, Dict, List, Literal, Optional, Union
 from datetime import timedelta, timezone
 from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Set, TypedDict, Union, get_args
 import aiohttp
 import dotenv
 from openai import APIError
 import litellm
@ -26,22 +20,13 @@ from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.litellm_core_utils.exception_mapping_utils import (
    _add_key_name_and_team_to_alert,
 )
 from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
-from litellm.proxy._types import (
+from litellm.proxy._types import AlertType, CallInfo, VirtualKeyEvent, WebhookEvent
    AlertType,
    CallInfo,
    UserAPIKeyAuth,
    VirtualKeyEvent,
    WebhookEvent,
 )
 from litellm.router import Router
 from litellm.types.integrations.slack_alerting import *
 from litellm.types.router import LiteLLM_Params
 from ..email_templates.templates import *
 from .batching_handler import send_to_webhook, squash_payloads
@ -1261,7 +1246,7 @@ Model Info:
        Returns -> True if sent, False if not.
        """
-        from litellm.proxy.proxy_server import premium_user, prisma_client
+        from litellm.proxy.proxy_server import premium_user
        from litellm.proxy.utils import send_email
        email_logo_url = os.getenv(
@ -1370,7 +1355,6 @@ Model Info:
        if alert_type not in self.alert_types:
            return
        import json
        from datetime import datetime
        # Get the current timestamp
--- a/litellm/integrations/SlackAlerting/utils.py
+++ b/litellm/integrations/SlackAlerting/utils.py
@ -5,7 +5,6 @@ Utils used for slack alerting
 import asyncio
 from typing import Dict, List, Optional, Union
 import litellm
 from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.proxy._types import AlertType
 from litellm.secret_managers.main import get_secret
--- a/litellm/integrations/argilla.py
+++ b/litellm/integrations/argilla.py
@ -6,14 +6,9 @@ import asyncio
 import json
 import os
 import random
 import time
 import traceback
 import types
-import uuid
+from typing import Any, Dict, List, Optional
 from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional, TypedDict, Union
 import dotenv  # type: ignore
 import httpx
 from pydantic import BaseModel  # type: ignore
@ -21,11 +16,7 @@ import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.prompt_templates.common_utils import (
    get_content_from_model_response,
 )
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
@ -33,7 +24,6 @@ from litellm.types.integrations.argilla import (
    SUPPORTED_PAYLOAD_FIELDS,
    ArgillaCredentialsObject,
    ArgillaItem,
    ArgillaPayload,
 )
 from litellm.types.utils import StandardLoggingPayload
--- a/litellm/integrations/arize_ai.py
+++ b/litellm/integrations/arize_ai.py
@ -5,7 +5,7 @@ this file has Arize ai specific helper functions
 """
 import json
-from typing import TYPE_CHECKING, Any, Optional, Union
+from typing import TYPE_CHECKING, Any, Optional
 from litellm._logging import verbose_logger
@ -30,7 +30,6 @@ class ArizeLogger:
    def set_arize_ai_attributes(span: Span, kwargs, response_obj):
        from litellm.integrations._types.open_inference import (
            MessageAttributes,
            MessageContentAttributes,
            OpenInferenceSpanKindValues,
            SpanAttributes,
        )
--- a/litellm/integrations/azure_storage/azure_storage.py
+++ b/litellm/integrations/azure_storage/azure_storage.py
@ -3,23 +3,8 @@ import json
 import os
 import uuid
 from datetime import datetime, timedelta
-from re import S, T
+from typing import List, Optional
 from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    Dict,
    List,
    Optional,
    Tuple,
    TypedDict,
    Union,
 )
 import httpx
 from pydantic import BaseModel, Field
 import litellm
 from litellm._logging import verbose_logger
 from litellm.constants import AZURE_STORAGE_MSFT_VERSION
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
--- a/litellm/integrations/braintrust_logging.py
+++ b/litellm/integrations/braintrust_logging.py
@ -2,15 +2,10 @@
 ## Log success + failure events to Braintrust
 import copy
 import json
 import os
 import threading
 import traceback
 import uuid
 from datetime import datetime
-from typing import Literal, Optional
+from typing import Optional
 import dotenv
 import httpx
 from pydantic import BaseModel
@ -18,12 +13,11 @@ import litellm
 from litellm import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    HTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
-from litellm.utils import get_formatted_prompt, print_verbose
+from litellm.utils import print_verbose
 global_braintrust_http_handler = get_async_httpx_client(
    llm_provider=httpxSpecialProvider.LoggingCallback
--- a/litellm/integrations/custom_batch_logger.py
+++ b/litellm/integrations/custom_batch_logger.py
@ -6,7 +6,7 @@ Use this if you want your logs to be stored in memory and flushed periodically
 import asyncio
 import time
-from typing import List, Literal, Optional
+from typing import List, Optional
 import litellm
 from litellm._logging import verbose_logger
--- a/litellm/integrations/custom_guardrail.py
+++ b/litellm/integrations/custom_guardrail.py
@ -1,4 +1,4 @@
-from typing import List, Literal, Optional
+from typing import List, Optional
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@ -1,18 +1,14 @@
 #### What this does ####
 #    On success, logs events to Promptlayer
 import os
 import traceback
 from datetime import datetime as datetimeObj
 from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
 import dotenv
 from pydantic import BaseModel
 from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.types.integrations.argilla import ArgillaItem
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest
 from litellm.types.services import ServiceLoggerPayload
 from litellm.types.utils import (
    AdapterCompletionStreamWrapper,
    EmbeddingResponse,
--- a/litellm/integrations/datadog/datadog.py
+++ b/litellm/integrations/datadog/datadog.py
@ -16,11 +16,10 @@ For batching specific details see CustomBatchLogger class
 import asyncio
 import datetime
 import os
 import sys
 import traceback
 import uuid
 from datetime import datetime as datetimeObj
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, List, Optional, Union
 from httpx import Response
@ -32,7 +31,6 @@ from litellm.llms.custom_httpx.http_handler import (
    get_async_httpx_client,
    httpxSpecialProvider,
 )
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.types.integrations.datadog import *
 from litellm.types.services import ServiceLoggerPayload
 from litellm.types.utils import StandardLoggingPayload
--- a/litellm/integrations/datadog/datadog_llm_obs.py
+++ b/litellm/integrations/datadog/datadog_llm_obs.py
@ -8,12 +8,9 @@ API Reference: https://docs.datadoghq.com/llm_observability/setup/api/?tab=examp
 import asyncio
 import os
 import traceback
 import uuid
 from datetime import datetime
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional
 from httpx import Response
 import litellm
 from litellm._logging import verbose_logger
--- a/litellm/integrations/dynamodb.py
+++ b/litellm/integrations/dynamodb.py
@ -1,14 +1,11 @@
 #### What this does ####
 #    On success + failure, log events to Supabase
 import datetime
 import os
 import traceback
 import uuid
 from typing import Any
 import dotenv
 import litellm
--- a/litellm/integrations/email_alerting.py
+++ b/litellm/integrations/email_alerting.py
@ -2,7 +2,6 @@
 Functions for sending Email Alerts
 """
 import asyncio
 import os
 from typing import List, Optional
@ -20,7 +19,7 @@ async def get_all_team_member_emails(team_id: Optional[str] = None) -> list:
    )
    if team_id is None:
        return []
-    from litellm.proxy.proxy_server import premium_user, prisma_client
+    from litellm.proxy.proxy_server import prisma_client
    if prisma_client is None:
        raise Exception("Not connected to DB!")
@ -72,7 +71,6 @@ async def send_team_budget_alert(webhook_event: WebhookEvent) -> bool:
    Send an Email Alert to All Team Members when the Team Budget is crossed
    Returns -> True if sent, False if not.
    """
    from litellm.proxy.proxy_server import premium_user, prisma_client
    from litellm.proxy.utils import send_email
    _team_id = webhook_event.team_id
--- a/litellm/integrations/galileo.py
+++ b/litellm/integrations/galileo.py
@ -1,15 +1,12 @@
 import os
 from datetime import datetime
 from typing import Any, Dict, List, Optional
 import httpx
 from pydantic import BaseModel, Field
 import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.llms.custom_httpx.http_handler import (
    _get_httpx_client,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
--- a/litellm/integrations/gcs_bucket/gcs_bucket.py
+++ b/litellm/integrations/gcs_bucket/gcs_bucket.py
@ -1,27 +1,14 @@
 import asyncio
 import json
 import os
 import uuid
 from datetime import datetime
-from re import S
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypedDict, Union
 import httpx
 from pydantic import BaseModel, Field
 import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from litellm.proxy._types import CommonProxyErrors
 from litellm.proxy._types import CommonProxyErrors, SpendLogsMetadata, SpendLogsPayload
 from litellm.types.integrations.gcs_bucket import *
-from litellm.types.utils import (
+from litellm.types.utils import StandardLoggingPayload
    StandardCallbackDynamicParams,
    StandardLoggingMetadata,
    StandardLoggingPayload,
 )
 if TYPE_CHECKING:
    from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
--- a/litellm/integrations/gcs_bucket/gcs_bucket_base.py
+++ b/litellm/integrations/gcs_bucket/gcs_bucket_base.py
@ -1,13 +1,7 @@
 import json
 import os
-import uuid
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
 from datetime import datetime
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypedDict, Union
 import httpx
 from pydantic import BaseModel, Field
 import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.llms.custom_httpx.http_handler import (
@ -15,11 +9,7 @@ from litellm.llms.custom_httpx.http_handler import (
    httpxSpecialProvider,
 )
 from litellm.types.integrations.gcs_bucket import *
-from litellm.types.utils import (
+from litellm.types.utils import StandardCallbackDynamicParams, StandardLoggingPayload
    StandardCallbackDynamicParams,
    StandardLoggingMetadata,
    StandardLoggingPayload,
 )
 if TYPE_CHECKING:
    from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
@ -190,9 +180,7 @@ class GCSBucketBase(CustomBatchLogger):
        This function is used to get the Vertex instance for the GCS Bucket Logger.
        It checks if the Vertex instance is already created and cached, if not it creates a new instance and caches it.
        """
-        from litellm.llms.vertex_ai.vertex_llm_base import (
+        from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
            VertexBase,
        )
        _in_memory_key = self._get_in_memory_key_for_vertex_instance(credentials)
        if _in_memory_key not in self.vertex_instances:
--- a/litellm/integrations/helicone.py
+++ b/litellm/integrations/helicone.py
@ -3,10 +3,7 @@
 import os
 import traceback
 import dotenv
 import litellm
 from litellm._logging import verbose_logger
 class HeliconeLogger:
--- a/litellm/integrations/lago.py
+++ b/litellm/integrations/lago.py
@ -3,11 +3,9 @@
 import json
 import os
 import traceback
 import uuid
 from typing import Literal, Optional
 import dotenv
 import httpx
 import litellm
--- a/litellm/integrations/langfuse/langfuse.py
+++ b/litellm/integrations/langfuse/langfuse.py
@ -3,7 +3,6 @@
 import copy
 import os
 import traceback
 import types
 from collections.abc import MutableMapping, MutableSequence, MutableSet
 from typing import TYPE_CHECKING, Any, Dict, Optional, cast
--- a/litellm/integrations/langfuse/langfuse_handler.py
+++ b/litellm/integrations/langfuse/langfuse_handler.py
@ -6,11 +6,8 @@ Used to get the LangFuseLogger for a given request
 Handles Key/Team Based Langfuse Logging
 """
 import os
 from typing import TYPE_CHECKING, Any, Dict, Optional
 from packaging.version import Version
 from litellm.litellm_core_utils.litellm_logging import StandardCallbackDynamicParams
 from .langfuse import LangFuseLogger, LangfuseLoggingConfig
--- a/litellm/integrations/langsmith.py
+++ b/litellm/integrations/langsmith.py
@ -3,14 +3,12 @@
 import asyncio
 import os
 import random
 import time
 import traceback
 import types
 import uuid
 from datetime import datetime, timezone
-from typing import Any, Dict, List, Optional, TypedDict, Union
+from typing import Any, Dict, List, Optional
 import dotenv  # type: ignore
 import httpx
 from pydantic import BaseModel  # type: ignore
@ -18,7 +16,6 @@ import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
 )
--- a/litellm/integrations/langtrace.py
+++ b/litellm/integrations/langtrace.py
@ -1,9 +1,7 @@
 import traceback
 import json
-from litellm.integrations.custom_logger import CustomLogger
+from typing import TYPE_CHECKING, Any
 from litellm.proxy._types import SpanAttributes
-from typing import TYPE_CHECKING, Any, Optional, Union
+from litellm.proxy._types import SpanAttributes
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
--- a/litellm/integrations/openmeter.py
+++ b/litellm/integrations/openmeter.py
@ -3,17 +3,12 @@
 import json
 import os
 import traceback
 import uuid
 import dotenv
 import httpx
 import litellm
 from litellm import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    HTTPHandler,
    get_async_httpx_client,
    httpxSpecialProvider,
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -1,7 +1,6 @@
 import os
 from dataclasses import dataclass
 from datetime import datetime
 from functools import wraps
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 import litellm
@ -10,10 +9,7 @@ from litellm.integrations.custom_logger import CustomLogger
 from litellm.types.services import ServiceLoggerPayload
 from litellm.types.utils import (
    ChatCompletionMessageToolCall,
    EmbeddingResponse,
    Function,
    ImageResponse,
    ModelResponse,
    StandardLoggingPayload,
 )
@ -139,7 +135,6 @@ class OpenTelemetry(CustomLogger):
        end_time: Optional[Union[datetime, float]] = None,
        event_metadata: Optional[dict] = None,
    ):
        from datetime import datetime
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
@ -201,7 +196,6 @@ class OpenTelemetry(CustomLogger):
        end_time: Optional[Union[float, datetime]] = None,
        event_metadata: Optional[dict] = None,
    ):
        from datetime import datetime
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
@ -666,7 +660,6 @@ class OpenTelemetry(CustomLogger):
        span.set_attribute(key, primitive_value)
    def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
        from litellm.proxy._types import SpanAttributes
        kwargs.get("optional_params", {})
        litellm_params = kwargs.get("litellm_params", {}) or {}
@ -834,7 +827,6 @@ class OpenTelemetry(CustomLogger):
        logging_payload: ManagementEndpointLoggingPayload,
        parent_otel_span: Optional[Span] = None,
    ):
        from datetime import datetime
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
@ -889,7 +881,6 @@ class OpenTelemetry(CustomLogger):
        logging_payload: ManagementEndpointLoggingPayload,
        parent_otel_span: Optional[Span] = None,
    ):
        from datetime import datetime
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
--- a/litellm/integrations/opik/utils.py
+++ b/litellm/integrations/opik/utils.py
@ -3,8 +3,6 @@ import os
 import time
 from typing import Dict, Final, List, Optional
 from litellm.types.utils import ModelResponse
 CONFIG_FILE_PATH_DEFAULT: Final[str] = "~/.opik.config"
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -1,15 +1,10 @@
 # used for /metrics endpoint on LiteLLM Proxy
 #### What this does ####
 #    On success, log events to Prometheus
 import os
 import subprocess
 import sys
-import traceback
+from datetime import datetime, timedelta
-import uuid
+from typing import Optional
 from datetime import date, datetime, timedelta
 from typing import Optional, TypedDict, Union
 import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
--- a/litellm/integrations/prometheus_helpers/prometheus_api.py
+++ b/litellm/integrations/prometheus_helpers/prometheus_api.py
@ -2,13 +2,10 @@
 Helper functions to query prometheus API
 """
 import asyncio
 import os
 import time
 from datetime import datetime, timedelta
 from typing import Optional
 import litellm
 from litellm import get_secret
 from litellm._logging import verbose_logger
 from litellm.llms.custom_httpx.http_handler import (
--- a/litellm/integrations/prometheus_services.py
+++ b/litellm/integrations/prometheus_services.py
@ -3,15 +3,8 @@
 #    On success + failure, log events to Prometheus for litellm / adjacent services (litellm, redis, postgres, llm api providers)
 import datetime
 import os
 import subprocess
 import sys
 import traceback
 import uuid
 from typing import List, Optional, Union
 import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.types.integrations.prometheus import LATENCY_BUCKETS
 from litellm.types.services import ServiceLoggerPayload, ServiceTypes
--- a/litellm/integrations/s3.py
+++ b/litellm/integrations/s3.py
@ -1,12 +1,6 @@
 #### What this does ####
 #    On success + failure, log events to Supabase
 import datetime
 import os
 import subprocess
 import sys
 import traceback
 import uuid
 from typing import Optional
 import litellm
--- a/litellm/integrations/supabase.py
+++ b/litellm/integrations/supabase.py
@ -1,14 +1,11 @@
 #### What this does ####
 #    On success + failure, log events to Supabase
 import datetime
 import os
 import subprocess
 import sys
 import traceback
 import dotenv
 import litellm
--- a/litellm/integrations/traceloop.py
+++ b/litellm/integrations/traceloop.py
@ -1,6 +1,5 @@
 import traceback
 import litellm
 from litellm._logging import verbose_logger
@ -12,9 +11,7 @@ class TraceloopLogger:
    def __init__(self):
        try:
            from opentelemetry.sdk.trace.export import ConsoleSpanExporter
            from traceloop.sdk import Traceloop
            from traceloop.sdk.instruments import Instruments
            from traceloop.sdk.tracing.tracing import TracerWrapper
        except ModuleNotFoundError as e:
            verbose_logger.error(
@ -39,7 +36,6 @@ class TraceloopLogger:
        level="DEFAULT",
        status_message=None,
    ):
        from opentelemetry import trace
        from opentelemetry.semconv.ai import SpanAttributes
        from opentelemetry.trace import SpanKind, Status, StatusCode
@ -78,7 +74,7 @@ class TraceloopLogger:
                    )
                if "top_p" in optional_params:
                    span.set_attribute(
-                        SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
+                        SpanAttributes.LLM_REQUEST_TOP_P, optional_params.get("top_p")
                    )
                if "tools" in optional_params or "functions" in optional_params:
                    span.set_attribute(
--- a/litellm/integrations/weights_biases.py
+++ b/litellm/integrations/weights_biases.py
@ -173,16 +173,14 @@ except Exception:
 #### What this does ####
 #    On success, logs events to Langfuse
 import os
 import traceback
 from datetime import datetime
 class WeightsBiasesLogger:
    # Class variables or attributes
    def __init__(self):
        try:
-            import wandb
+            pass
        except Exception:
            raise Exception(
                "\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m"
--- a/litellm/litellm_core_utils/asyncify.py
+++ b/litellm/litellm_core_utils/asyncify.py
@ -3,7 +3,6 @@ from typing import Awaitable, Callable, Optional
 import anyio
 import anyio.to_thread
 from anyio import to_thread
 from typing_extensions import ParamSpec, TypeVar
 T_ParamSpec = ParamSpec("T_ParamSpec")
--- a/litellm/litellm_core_utils/core_helpers.py
+++ b/litellm/litellm_core_utils/core_helpers.py
@ -1,7 +1,6 @@
 # What is this?
 ## Helper utilities
-import os
+from typing import TYPE_CHECKING, Any, Optional, Union
 from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
 import httpx
--- a/litellm/litellm_core_utils/exception_mapping_utils.py
+++ b/litellm/litellm_core_utils/exception_mapping_utils.py
@ -1,6 +1,4 @@
 import json
 import os
 import threading
 import traceback
 from typing import Optional
@ -14,17 +12,14 @@ from ..exceptions import (
    APIError,
    AuthenticationError,
    BadRequestError,
    BudgetExceededError,
    ContentPolicyViolationError,
    ContextWindowExceededError,
    NotFoundError,
    OpenAIError,
    PermissionDeniedError,
    RateLimitError,
    ServiceUnavailableError,
    Timeout,
    UnprocessableEntityError,
    UnsupportedParamsError,
 )
--- a/Show more
+++ b/Show more