diff --git a/cookbook/VLLM_Model_Testing.ipynb b/cookbook/VLLM_Model_Testing.ipynb deleted file mode 100644 index 6511fa900..000000000 --- a/cookbook/VLLM_Model_Testing.ipynb +++ /dev/null @@ -1,444 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "machine_shape": "hm", - "gpuType": "V100" - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - }, - "accelerator": "GPU" - }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Set up Environment" - ], - "metadata": { - "id": "vDOm5wfjdFLP" - } - }, - { - "cell_type": "code", - "source": [ - "!pip install --upgrade litellm" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Bx6mAA6MHiy_", - "outputId": "949ad348-5448-40f8-fdf5-acdf0b0ecc70" - }, - "execution_count": 1, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting litellm\n", - " Downloading litellm-0.1.555-py3-none-any.whl (100 kB)\n", - "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/100.4 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━\u001b[0m \u001b[32m92.2/100.4 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m100.4/100.4 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: importlib-metadata<7.0.0,>=6.8.0 in /usr/local/lib/python3.10/dist-packages (from litellm) (6.8.0)\n", - "Collecting openai<0.28.0,>=0.27.8 (from litellm)\n", - " Downloading openai-0.27.10-py3-none-any.whl (76 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.5/76.5 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting python-dotenv>=0.2.0 (from litellm)\n", - " Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)\n", - "Collecting tiktoken<0.5.0,>=0.4.0 (from litellm)\n", - " Downloading tiktoken-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m32.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata<7.0.0,>=6.8.0->litellm) (3.16.2)\n", - "Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm) (2.31.0)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm) (4.66.1)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm) (3.8.5)\n", - "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken<0.5.0,>=0.4.0->litellm) (2023.6.3)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm) (2.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm) (2023.7.22)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm) (23.1.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm) (6.0.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm) (4.0.3)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm) (1.4.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm) (1.3.1)\n", - "Installing collected packages: python-dotenv, tiktoken, openai, litellm\n", - "Successfully installed litellm-0.1.555 openai-0.27.10 python-dotenv-1.0.0 tiktoken-0.4.0\n" - ] - } - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "zIYv7JTyxSxR", - "outputId": "53890320-f9fa-4bf4-8362-0f17f52c6ed4" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Successfully installed fastapi-0.103.1 h11-0.14.0 huggingface-hub-0.16.4 ninja-1.11.1 pydantic-1.10.12 ray-2.6.3 safetensors-0.3.3 sentencepiece-0.1.99 starlette-0.27.0 tokenizers-0.13.3 transformers-4.33.1 uvicorn-0.23.2 vllm-0.1.4 xformers-0.0.21\n" - ] - } - ], - "source": [ - "!pip install vllm" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Load the Logs" - ], - "metadata": { - "id": "RMcoAni6WKEx" - } - }, - { - "cell_type": "code", - "source": [ - "import pandas as pd" - ], - "metadata": { - "id": "zchxB8c7WJe5" - }, - "execution_count": 4, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# path of the csv file\n", - "file_path = 'Model-prompts-example.csv'\n", - "\n", - "# load the csv file as a pandas DataFrame\n", - "data = pd.read_csv(file_path)\n", - "\n", - "data.head()" - ], - "metadata": { - "id": "aKcWr015WNPm", - "outputId": "6e226773-333f-46a2-9fc8-4f54f309d204", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 81 - } - }, - "execution_count": 6, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " Success Timestamp Input \\\n", - "0 True 1694041195 This is the templated query input \n", - "\n", - " Output RunId (Wandb Runid) \\\n", - "0 This is the query output from the model 8hlumwuk \n", - "\n", - " Model ID (or Name) \n", - "0 OpenAI/Turbo-3.5 " - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
SuccessTimestampInputOutputRunId (Wandb Runid)Model ID (or Name)
0True1694041195This is the templated query inputThis is the query output from the model8hlumwukOpenAI/Turbo-3.5
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - "
\n" - ] - }, - "metadata": {}, - "execution_count": 6 - } - ] - }, - { - "cell_type": "code", - "source": [ - "input_texts = data['Input'].values" - ], - "metadata": { - "id": "0DbL-kirWUyn" - }, - "execution_count": 7, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "messages = [[{\"role\": \"user\", \"content\": input_text}] for input_text in input_texts]" - ], - "metadata": { - "id": "cqpAvy8hWXyC" - }, - "execution_count": 8, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Running Inference" - ], - "metadata": { - "id": "SugCyom0Xy8U" - } - }, - { - "cell_type": "code", - "source": [ - "from litellm import batch_completion\n", - "model_name = \"facebook/opt-125m\"\n", - "provider = \"vllm\"\n", - "response_list = batch_completion(\n", - " model=model_name,\n", - " custom_llm_provider=provider, # can easily switch to huggingface, replicate, together ai, sagemaker, etc.\n", - " messages=messages,\n", - " temperature=0.2,\n", - " max_tokens=80,\n", - " )" - ], - "metadata": { - "id": "qpikx3uxHns3" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "response_list" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "QDPikHtwKJJ2", - "outputId": "06f47c44-e258-452a-f9db-232a5b6d2810" - }, - "execution_count": 10, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[ JSON: {\n", - " \"choices\": [\n", - " {\n", - " \"finish_reason\": \"stop\",\n", - " \"index\": 0,\n", - " \"message\": {\n", - " \"content\": \".\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is\",\n", - " \"role\": \"assistant\",\n", - " \"logprobs\": null\n", - " }\n", - " }\n", - " ],\n", - " \"created\": 1694053363.6139505,\n", - " \"model\": \"facebook/opt-125m\",\n", - " \"usage\": {\n", - " \"prompt_tokens\": 9,\n", - " \"completion_tokens\": 80,\n", - " \"total_tokens\": 89\n", - " }\n", - " }]" - ] - }, - "metadata": {}, - "execution_count": 10 - } - ] - }, - { - "cell_type": "code", - "source": [ - "response_values = [response['choices'][0]['message']['content'] for response in response_list]" - ], - "metadata": { - "id": "SYqTcCiJbQDF" - }, - "execution_count": 11, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "response_values" - ], - "metadata": { - "id": "wqs-Oy9FbiPo", - "outputId": "16a6a7b7-97c8-4b5b-eff8-09ea5eb5ad06", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 12, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is']" - ] - }, - "metadata": {}, - "execution_count": 12 - } - ] - }, - { - "cell_type": "code", - "source": [ - "data[f\"{model_name}_output\"] = response_values" - ], - "metadata": { - "id": "mElNbBehbkrz" - }, - "execution_count": 13, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "data.to_csv('model_responses.csv', index=False)" - ], - "metadata": { - "id": "F06NXssDc45k" - }, - "execution_count": 14, - "outputs": [] - } - ] -}