{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "machine_shape": "hm", "gpuType": "V100" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "# Set up Environment" ], "metadata": { "id": "vDOm5wfjdFLP" } }, { "cell_type": "code", "source": [ "!pip install --upgrade litellm" ], "metadata": { "id": "Bx6mAA6MHiy_" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "zIYv7JTyxSxR", "outputId": "53890320-f9fa-4bf4-8362-0f17f52c6ed4" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Successfully installed fastapi-0.103.1 h11-0.14.0 huggingface-hub-0.16.4 ninja-1.11.1 pydantic-1.10.12 ray-2.6.3 safetensors-0.3.3 sentencepiece-0.1.99 starlette-0.27.0 tokenizers-0.13.3 transformers-4.33.1 uvicorn-0.23.2 vllm-0.1.4 xformers-0.0.21\n" ] } ], "source": [ "!pip install vllm" ] }, { "cell_type": "markdown", "source": [ "# Load the Logs" ], "metadata": { "id": "RMcoAni6WKEx" } }, { "cell_type": "code", "source": [ "import pandas as pd" ], "metadata": { "id": "zchxB8c7WJe5" }, "execution_count": 4, "outputs": [] }, { "cell_type": "code", "source": [ "# path of the csv file\n", "file_path = 'Model-prompts-example.csv'\n", "\n", "# load the csv file as a pandas DataFrame\n", "data = pd.read_csv(file_path)\n", "\n", "data.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 81 }, "id": "aKcWr015WNPm", "outputId": "6e226773-333f-46a2-9fc8-4f54f309d204" }, "execution_count": 6, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Success Timestamp Input \\\n", "0 True 1694041195 This is the templated query input \n", "\n", " Output RunId (Wandb Runid) \\\n", "0 This is the query output from the model 8hlumwuk \n", "\n", " Model ID (or Name) \n", "0 OpenAI/Turbo-3.5 " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SuccessTimestampInputOutputRunId (Wandb Runid)Model ID (or Name)
0True1694041195This is the templated query inputThis is the query output from the model8hlumwukOpenAI/Turbo-3.5
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "code", "source": [ "input_texts = data['Input'].values" ], "metadata": { "id": "0DbL-kirWUyn" }, "execution_count": 7, "outputs": [] }, { "cell_type": "code", "source": [ "messages = [[{\"role\": \"user\", \"content\": input_text}] for input_text in input_texts]" ], "metadata": { "id": "cqpAvy8hWXyC" }, "execution_count": 8, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Running Inference" ], "metadata": { "id": "SugCyom0Xy8U" } }, { "cell_type": "code", "source": [ "from litellm import batch_completion\n", "model_name = \"facebook/opt-125m\"\n", "provider = \"vllm\"\n", "response_list = batch_completion(\n", " model=model_name,\n", " custom_llm_provider=provider, # can easily switch to huggingface, replicate, together ai, sagemaker, etc.\n", " messages=messages,\n", " temperature=0.2,\n", " max_tokens=80,\n", " )" ], "metadata": { "id": "qpikx3uxHns3" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "response_list" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QDPikHtwKJJ2", "outputId": "06f47c44-e258-452a-f9db-232a5b6d2810" }, "execution_count": 10, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[ JSON: {\n", " \"choices\": [\n", " {\n", " \"finish_reason\": \"stop\",\n", " \"index\": 0,\n", " \"message\": {\n", " \"content\": \".\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is\",\n", " \"role\": \"assistant\",\n", " \"logprobs\": null\n", " }\n", " }\n", " ],\n", " \"created\": 1694053363.6139505,\n", " \"model\": \"facebook/opt-125m\",\n", " \"usage\": {\n", " \"prompt_tokens\": 9,\n", " \"completion_tokens\": 80,\n", " \"total_tokens\": 89\n", " }\n", " }]" ] }, "metadata": {}, "execution_count": 10 } ] }, { "cell_type": "code", "source": [ "response_values = [response['choices'][0]['message']['content'] for response in response_list]" ], "metadata": { "id": "SYqTcCiJbQDF" }, "execution_count": 11, "outputs": [] }, { "cell_type": "code", "source": [ "response_values" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wqs-Oy9FbiPo", "outputId": "16a6a7b7-97c8-4b5b-eff8-09ea5eb5ad06" }, "execution_count": 12, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is']" ] }, "metadata": {}, "execution_count": 12 } ] }, { "cell_type": "code", "source": [ "data[f\"{model_name}_output\"] = response_values" ], "metadata": { "id": "mElNbBehbkrz" }, "execution_count": 13, "outputs": [] }, { "cell_type": "code", "source": [ "data.to_csv('model_responses.csv', index=False)" ], "metadata": { "id": "F06NXssDc45k" }, "execution_count": 14, "outputs": [] } ] }