diff --git a/cookbook/VLLM_Model_Testing.ipynb b/cookbook/VLLM_Model_Testing.ipynb
deleted file mode 100644
index 6511fa900..000000000
--- a/cookbook/VLLM_Model_Testing.ipynb
+++ /dev/null
@@ -1,444 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": [],
-      "machine_shape": "hm",
-      "gpuType": "V100"
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    },
-    "accelerator": "GPU"
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# Set up Environment"
-      ],
-      "metadata": {
-        "id": "vDOm5wfjdFLP"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "!pip install --upgrade litellm"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "Bx6mAA6MHiy_",
-        "outputId": "949ad348-5448-40f8-fdf5-acdf0b0ecc70"
-      },
-      "execution_count": 1,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Collecting litellm\n",
-            "  Downloading litellm-0.1.555-py3-none-any.whl (100 kB)\n",
-            "\u001b[?25l     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/100.4 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K     \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━\u001b[0m \u001b[32m92.2/100.4 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m100.4/100.4 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: importlib-metadata<7.0.0,>=6.8.0 in /usr/local/lib/python3.10/dist-packages (from litellm) (6.8.0)\n",
-            "Collecting openai<0.28.0,>=0.27.8 (from litellm)\n",
-            "  Downloading openai-0.27.10-py3-none-any.whl (76 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.5/76.5 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting python-dotenv>=0.2.0 (from litellm)\n",
-            "  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)\n",
-            "Collecting tiktoken<0.5.0,>=0.4.0 (from litellm)\n",
-            "  Downloading tiktoken-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m32.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata<7.0.0,>=6.8.0->litellm) (3.16.2)\n",
-            "Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm) (2.31.0)\n",
-            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm) (4.66.1)\n",
-            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm) (3.8.5)\n",
-            "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken<0.5.0,>=0.4.0->litellm) (2023.6.3)\n",
-            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm) (3.2.0)\n",
-            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm) (3.4)\n",
-            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm) (2.0.4)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm) (2023.7.22)\n",
-            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm) (23.1.0)\n",
-            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm) (6.0.4)\n",
-            "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm) (4.0.3)\n",
-            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm) (1.9.2)\n",
-            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm) (1.4.0)\n",
-            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm) (1.3.1)\n",
-            "Installing collected packages: python-dotenv, tiktoken, openai, litellm\n",
-            "Successfully installed litellm-0.1.555 openai-0.27.10 python-dotenv-1.0.0 tiktoken-0.4.0\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 2,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "zIYv7JTyxSxR",
-        "outputId": "53890320-f9fa-4bf4-8362-0f17f52c6ed4"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Successfully installed fastapi-0.103.1 h11-0.14.0 huggingface-hub-0.16.4 ninja-1.11.1 pydantic-1.10.12 ray-2.6.3 safetensors-0.3.3 sentencepiece-0.1.99 starlette-0.27.0 tokenizers-0.13.3 transformers-4.33.1 uvicorn-0.23.2 vllm-0.1.4 xformers-0.0.21\n"
-          ]
-        }
-      ],
-      "source": [
-        "!pip install vllm"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# Load the Logs"
-      ],
-      "metadata": {
-        "id": "RMcoAni6WKEx"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import pandas as pd"
-      ],
-      "metadata": {
-        "id": "zchxB8c7WJe5"
-      },
-      "execution_count": 4,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# path of the csv file\n",
-        "file_path = 'Model-prompts-example.csv'\n",
-        "\n",
-        "# load the csv file as a pandas DataFrame\n",
-        "data = pd.read_csv(file_path)\n",
-        "\n",
-        "data.head()"
-      ],
-      "metadata": {
-        "id": "aKcWr015WNPm",
-        "outputId": "6e226773-333f-46a2-9fc8-4f54f309d204",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 81
-        }
-      },
-      "execution_count": 6,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "   Success   Timestamp                              Input  \\\n",
-              "0     True  1694041195  This is the templated query input   \n",
-              "\n",
-              "                                    Output RunId (Wandb Runid)  \\\n",
-              "0  This is the query output from the model            8hlumwuk   \n",
-              "\n",
-              "  Model ID (or Name)  \n",
-              "0   OpenAI/Turbo-3.5  "
-            ],
-            "text/html": [
-              "\n",
-              "  <div id=\"df-cd06d09e-fb43-41b0-938f-37f9d285ae66\" class=\"colab-df-container\">\n",
-              "    <div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>Success</th>\n",
-              "      <th>Timestamp</th>\n",
-              "      <th>Input</th>\n",
-              "      <th>Output</th>\n",
-              "      <th>RunId (Wandb Runid)</th>\n",
-              "      <th>Model ID (or Name)</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>True</td>\n",
-              "      <td>1694041195</td>\n",
-              "      <td>This is the templated query input</td>\n",
-              "      <td>This is the query output from the model</td>\n",
-              "      <td>8hlumwuk</td>\n",
-              "      <td>OpenAI/Turbo-3.5</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>\n",
-              "    <div class=\"colab-df-buttons\">\n",
-              "\n",
-              "  <div class=\"colab-df-container\">\n",
-              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-cd06d09e-fb43-41b0-938f-37f9d285ae66')\"\n",
-              "            title=\"Convert this dataframe to an interactive table.\"\n",
-              "            style=\"display:none;\">\n",
-              "\n",
-              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
-              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
-              "  </svg>\n",
-              "    </button>\n",
-              "\n",
-              "  <style>\n",
-              "    .colab-df-container {\n",
-              "      display:flex;\n",
-              "      gap: 12px;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-convert {\n",
-              "      background-color: #E8F0FE;\n",
-              "      border: none;\n",
-              "      border-radius: 50%;\n",
-              "      cursor: pointer;\n",
-              "      display: none;\n",
-              "      fill: #1967D2;\n",
-              "      height: 32px;\n",
-              "      padding: 0 0 0 0;\n",
-              "      width: 32px;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-convert:hover {\n",
-              "      background-color: #E2EBFA;\n",
-              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
-              "      fill: #174EA6;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-buttons div {\n",
-              "      margin-bottom: 4px;\n",
-              "    }\n",
-              "\n",
-              "    [theme=dark] .colab-df-convert {\n",
-              "      background-color: #3B4455;\n",
-              "      fill: #D2E3FC;\n",
-              "    }\n",
-              "\n",
-              "    [theme=dark] .colab-df-convert:hover {\n",
-              "      background-color: #434B5C;\n",
-              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
-              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
-              "      fill: #FFFFFF;\n",
-              "    }\n",
-              "  </style>\n",
-              "\n",
-              "    <script>\n",
-              "      const buttonEl =\n",
-              "        document.querySelector('#df-cd06d09e-fb43-41b0-938f-37f9d285ae66 button.colab-df-convert');\n",
-              "      buttonEl.style.display =\n",
-              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
-              "\n",
-              "      async function convertToInteractive(key) {\n",
-              "        const element = document.querySelector('#df-cd06d09e-fb43-41b0-938f-37f9d285ae66');\n",
-              "        const dataTable =\n",
-              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
-              "                                                    [key], {});\n",
-              "        if (!dataTable) return;\n",
-              "\n",
-              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
-              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
-              "          + ' to learn more about interactive tables.';\n",
-              "        element.innerHTML = '';\n",
-              "        dataTable['output_type'] = 'display_data';\n",
-              "        await google.colab.output.renderOutput(dataTable, element);\n",
-              "        const docLink = document.createElement('div');\n",
-              "        docLink.innerHTML = docLinkHtml;\n",
-              "        element.appendChild(docLink);\n",
-              "      }\n",
-              "    </script>\n",
-              "  </div>\n",
-              "\n",
-              "\n",
-              "    </div>\n",
-              "  </div>\n"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 6
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "input_texts = data['Input'].values"
-      ],
-      "metadata": {
-        "id": "0DbL-kirWUyn"
-      },
-      "execution_count": 7,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "messages = [[{\"role\": \"user\", \"content\": input_text}] for input_text in input_texts]"
-      ],
-      "metadata": {
-        "id": "cqpAvy8hWXyC"
-      },
-      "execution_count": 8,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# Running Inference"
-      ],
-      "metadata": {
-        "id": "SugCyom0Xy8U"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "from litellm import batch_completion\n",
-        "model_name = \"facebook/opt-125m\"\n",
-        "provider = \"vllm\"\n",
-        "response_list = batch_completion(\n",
-        "            model=model_name,\n",
-        "            custom_llm_provider=provider, # can easily switch to huggingface, replicate, together ai, sagemaker, etc.\n",
-        "            messages=messages,\n",
-        "            temperature=0.2,\n",
-        "            max_tokens=80,\n",
-        "        )"
-      ],
-      "metadata": {
-        "id": "qpikx3uxHns3"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "response_list"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "QDPikHtwKJJ2",
-        "outputId": "06f47c44-e258-452a-f9db-232a5b6d2810"
-      },
-      "execution_count": 10,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "[<ModelResponse at 0x7e5b87616750> JSON: {\n",
-              "   \"choices\": [\n",
-              "     {\n",
-              "       \"finish_reason\": \"stop\",\n",
-              "       \"index\": 0,\n",
-              "       \"message\": {\n",
-              "         \"content\": \".\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is\",\n",
-              "         \"role\": \"assistant\",\n",
-              "         \"logprobs\": null\n",
-              "       }\n",
-              "     }\n",
-              "   ],\n",
-              "   \"created\": 1694053363.6139505,\n",
-              "   \"model\": \"facebook/opt-125m\",\n",
-              "   \"usage\": {\n",
-              "     \"prompt_tokens\": 9,\n",
-              "     \"completion_tokens\": 80,\n",
-              "     \"total_tokens\": 89\n",
-              "   }\n",
-              " }]"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 10
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "response_values = [response['choices'][0]['message']['content'] for response in response_list]"
-      ],
-      "metadata": {
-        "id": "SYqTcCiJbQDF"
-      },
-      "execution_count": 11,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "response_values"
-      ],
-      "metadata": {
-        "id": "wqs-Oy9FbiPo",
-        "outputId": "16a6a7b7-97c8-4b5b-eff8-09ea5eb5ad06",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        }
-      },
-      "execution_count": 12,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "['.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is']"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 12
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "data[f\"{model_name}_output\"] = response_values"
-      ],
-      "metadata": {
-        "id": "mElNbBehbkrz"
-      },
-      "execution_count": 13,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "data.to_csv('model_responses.csv', index=False)"
-      ],
-      "metadata": {
-        "id": "F06NXssDc45k"
-      },
-      "execution_count": 14,
-      "outputs": []
-    }
-  ]
-}