From e463898949e70e5c29eecab6420734c70ab9bf9d Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 19 Sep 2023 11:43:02 -0700 Subject: [PATCH] add petals cookbook --- cookbook/LiteLLM_Petals.ipynb | 565 ++++++++++++++++++++++++++++++++++ 1 file changed, 565 insertions(+) create mode 100644 cookbook/LiteLLM_Petals.ipynb diff --git a/cookbook/LiteLLM_Petals.ipynb b/cookbook/LiteLLM_Petals.ipynb new file mode 100644 index 0000000000..07eb7c4893 --- /dev/null +++ b/cookbook/LiteLLM_Petals.ipynb @@ -0,0 +1,565 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "2fec5cc400424671a3d517327117d18a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3687c76fe84d464baaf35366b21e83b3", + "IPY_MODEL_c29d4460dbaa441cae110b58e0014151", + "IPY_MODEL_6560449a38bf4a7bacd97ccaacf01c4c" + ], + "layout": "IPY_MODEL_5fbd6ae281984d28ba59ebfd0279eda7" + } + }, + "3687c76fe84d464baaf35366b21e83b3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_323e30e275434aeea241163e5f1f9031", + "placeholder": "​", + "style": "IPY_MODEL_48f4adec51c94f9da6e4c4564daeff84", + "value": "Loading checkpoint shards: 100%" + } + }, + "c29d4460dbaa441cae110b58e0014151": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2a672981a44b4a7fb30674f97f4c10c6", + "max": 2, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d75ae8d22ea74840b4c80c8f386384c4", + "value": 2 + } + }, + "6560449a38bf4a7bacd97ccaacf01c4c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_54c06312ecff4e7588665e8b0cb7118b", + "placeholder": "​", + "style": "IPY_MODEL_300078a9d1a6483fba81a4be63793ff7", + "value": " 2/2 [00:00<00:00, 2.36it/s]" + } + }, + "5fbd6ae281984d28ba59ebfd0279eda7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "323e30e275434aeea241163e5f1f9031": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "48f4adec51c94f9da6e4c4564daeff84": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2a672981a44b4a7fb30674f97f4c10c6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d75ae8d22ea74840b4c80c8f386384c4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "54c06312ecff4e7588665e8b0cb7118b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "300078a9d1a6483fba81a4be63793ff7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Using LiteLLM with Petals" + ], + "metadata": { + "id": "dwGtLi_tvM6N" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bdlgaWQqDpzj" + }, + "outputs": [], + "source": [ + "!pip install litellm # 0.1.702 and upwards" + ] + }, + { + "cell_type": "code", + "source": [ + "# install petals\n", + "!pip install git+https://github.com/bigscience-workshop/petals" + ], + "metadata": { + "id": "5Id2QKwOEH8X" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## petals-team/StableBeluga2" + ], + "metadata": { + "id": "k42fldw3veSN" + } + }, + { + "cell_type": "code", + "source": [ + "from litellm import completion\n", + "\n", + "response = completion(model=\"petals/petals-team/StableBeluga2\", messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}], max_tokens=50)\n", + "\n", + "print(response)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tIHcEHdSDqju", + "outputId": "485dbf54-395c-433a-bbf4-8eb70a9fa624" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "You are using the default legacy behaviour of the . If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n", + "Sep 19 18:39:50.634 [\u001b[1m\u001b[34mINFO\u001b[0m] Make sure you follow the LLaMA's terms of use: https://bit.ly/llama2-license for LLaMA 2, https://bit.ly/llama-license for LLaMA 1\n", + "Sep 19 18:39:50.639 [\u001b[1m\u001b[34mINFO\u001b[0m] Using DHT prefix: StableBeluga2-hf\n", + "Sep 19 18:40:13.920 [\u001b[1m\u001b[34mINFO\u001b[0m] Route found: 0:40 via …HfQWVM => 40:80 via …Zj98Se\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{\n", + " \"object\": \"chat.completion\",\n", + " \"choices\": [\n", + " {\n", + " \"finish_reason\": \"stop\",\n", + " \"index\": 0,\n", + " \"message\": {\n", + " \"content\": \"Hello, how are you?\\nI'm doing well, thank you. I'm just getting ready to go to the gym.\\nOh, that's great. I'm trying to get back into a workout routine myself.\\nYeah,\",\n", + " \"role\": \"assistant\",\n", + " \"logprobs\": null\n", + " }\n", + " }\n", + " ],\n", + " \"id\": \"chatcmpl-f09d79b3-c1d1-49b7-b55f-cd8dfa1043bf\",\n", + " \"created\": 1695148897.473613,\n", + " \"model\": \"petals-team/StableBeluga2\",\n", + " \"usage\": {\n", + " \"prompt_tokens\": 6,\n", + " \"completion_tokens\": 45,\n", + " \"total_tokens\": 51\n", + " }\n", + "}\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## huggyllama/llama-65b" + ], + "metadata": { + "id": "J8DubRnHvh_j" + } + }, + { + "cell_type": "code", + "source": [ + "response = completion(model=\"petals/huggyllama/llama-65b\", messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}], temperature=0.2, max_tokens=10)\n", + "\n", + "print(response)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 538, + "referenced_widgets": [ + "2fec5cc400424671a3d517327117d18a", + "3687c76fe84d464baaf35366b21e83b3", + "c29d4460dbaa441cae110b58e0014151", + "6560449a38bf4a7bacd97ccaacf01c4c", + "5fbd6ae281984d28ba59ebfd0279eda7", + "323e30e275434aeea241163e5f1f9031", + "48f4adec51c94f9da6e4c4564daeff84", + "2a672981a44b4a7fb30674f97f4c10c6", + "d75ae8d22ea74840b4c80c8f386384c4", + "54c06312ecff4e7588665e8b0cb7118b", + "300078a9d1a6483fba81a4be63793ff7" + ] + }, + "id": "IlTCJwDsNvgF", + "outputId": "2e84d125-d982-48ed-8a92-6ca438a50d0c" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Sep 19 18:41:37.912 [\u001b[1m\u001b[34mINFO\u001b[0m] Make sure you follow the LLaMA's terms of use: https://bit.ly/llama2-license for LLaMA 2, https://bit.ly/llama-license for LLaMA 1\n", + "Sep 19 18:41:37.914 [\u001b[1m\u001b[34mINFO\u001b[0m] Using DHT prefix: llama-65b-hf\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/2 [00:00