From e463898949e70e5c29eecab6420734c70ab9bf9d Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 19 Sep 2023 11:43:02 -0700
Subject: [PATCH] add petals cookbook

---
 cookbook/LiteLLM_Petals.ipynb | 565 ++++++++++++++++++++++++++++++++++
 1 file changed, 565 insertions(+)
 create mode 100644 cookbook/LiteLLM_Petals.ipynb

diff --git a/cookbook/LiteLLM_Petals.ipynb b/cookbook/LiteLLM_Petals.ipynb
new file mode 100644
index 0000000000..07eb7c4893
--- /dev/null
+++ b/cookbook/LiteLLM_Petals.ipynb
@@ -0,0 +1,565 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU",
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "2fec5cc400424671a3d517327117d18a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_3687c76fe84d464baaf35366b21e83b3",
+              "IPY_MODEL_c29d4460dbaa441cae110b58e0014151",
+              "IPY_MODEL_6560449a38bf4a7bacd97ccaacf01c4c"
+            ],
+            "layout": "IPY_MODEL_5fbd6ae281984d28ba59ebfd0279eda7"
+          }
+        },
+        "3687c76fe84d464baaf35366b21e83b3": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_323e30e275434aeea241163e5f1f9031",
+            "placeholder": "​",
+            "style": "IPY_MODEL_48f4adec51c94f9da6e4c4564daeff84",
+            "value": "Loading checkpoint shards: 100%"
+          }
+        },
+        "c29d4460dbaa441cae110b58e0014151": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_2a672981a44b4a7fb30674f97f4c10c6",
+            "max": 2,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_d75ae8d22ea74840b4c80c8f386384c4",
+            "value": 2
+          }
+        },
+        "6560449a38bf4a7bacd97ccaacf01c4c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_54c06312ecff4e7588665e8b0cb7118b",
+            "placeholder": "​",
+            "style": "IPY_MODEL_300078a9d1a6483fba81a4be63793ff7",
+            "value": " 2/2 [00:00&lt;00:00,  2.36it/s]"
+          }
+        },
+        "5fbd6ae281984d28ba59ebfd0279eda7": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "323e30e275434aeea241163e5f1f9031": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "48f4adec51c94f9da6e4c4564daeff84": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "2a672981a44b4a7fb30674f97f4c10c6": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "d75ae8d22ea74840b4c80c8f386384c4": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "54c06312ecff4e7588665e8b0cb7118b": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "300078a9d1a6483fba81a4be63793ff7": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        }
+      }
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Using LiteLLM with Petals"
+      ],
+      "metadata": {
+        "id": "dwGtLi_tvM6N"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bdlgaWQqDpzj"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install litellm # 0.1.702 and upwards"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# install petals\n",
+        "!pip install git+https://github.com/bigscience-workshop/petals"
+      ],
+      "metadata": {
+        "id": "5Id2QKwOEH8X"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## petals-team/StableBeluga2"
+      ],
+      "metadata": {
+        "id": "k42fldw3veSN"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from litellm import completion\n",
+        "\n",
+        "response = completion(model=\"petals/petals-team/StableBeluga2\", messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}], max_tokens=50)\n",
+        "\n",
+        "print(response)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "tIHcEHdSDqju",
+        "outputId": "485dbf54-395c-433a-bbf4-8eb70a9fa624"
+      },
+      "execution_count": 3,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n",
+            "Sep 19 18:39:50.634 [\u001b[1m\u001b[34mINFO\u001b[0m] Make sure you follow the LLaMA's terms of use: https://bit.ly/llama2-license for LLaMA 2, https://bit.ly/llama-license for LLaMA 1\n",
+            "Sep 19 18:39:50.639 [\u001b[1m\u001b[34mINFO\u001b[0m] Using DHT prefix: StableBeluga2-hf\n",
+            "Sep 19 18:40:13.920 [\u001b[1m\u001b[34mINFO\u001b[0m] Route found: 0:40 via …HfQWVM => 40:80 via …Zj98Se\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "{\n",
+            "  \"object\": \"chat.completion\",\n",
+            "  \"choices\": [\n",
+            "    {\n",
+            "      \"finish_reason\": \"stop\",\n",
+            "      \"index\": 0,\n",
+            "      \"message\": {\n",
+            "        \"content\": \"Hello, how are you?\\nI'm doing well, thank you. I'm just getting ready to go to the gym.\\nOh, that's great. I'm trying to get back into a workout routine myself.\\nYeah,\",\n",
+            "        \"role\": \"assistant\",\n",
+            "        \"logprobs\": null\n",
+            "      }\n",
+            "    }\n",
+            "  ],\n",
+            "  \"id\": \"chatcmpl-f09d79b3-c1d1-49b7-b55f-cd8dfa1043bf\",\n",
+            "  \"created\": 1695148897.473613,\n",
+            "  \"model\": \"petals-team/StableBeluga2\",\n",
+            "  \"usage\": {\n",
+            "    \"prompt_tokens\": 6,\n",
+            "    \"completion_tokens\": 45,\n",
+            "    \"total_tokens\": 51\n",
+            "  }\n",
+            "}\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## huggyllama/llama-65b"
+      ],
+      "metadata": {
+        "id": "J8DubRnHvh_j"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "response = completion(model=\"petals/huggyllama/llama-65b\", messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}], temperature=0.2, max_tokens=10)\n",
+        "\n",
+        "print(response)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 538,
+          "referenced_widgets": [
+            "2fec5cc400424671a3d517327117d18a",
+            "3687c76fe84d464baaf35366b21e83b3",
+            "c29d4460dbaa441cae110b58e0014151",
+            "6560449a38bf4a7bacd97ccaacf01c4c",
+            "5fbd6ae281984d28ba59ebfd0279eda7",
+            "323e30e275434aeea241163e5f1f9031",
+            "48f4adec51c94f9da6e4c4564daeff84",
+            "2a672981a44b4a7fb30674f97f4c10c6",
+            "d75ae8d22ea74840b4c80c8f386384c4",
+            "54c06312ecff4e7588665e8b0cb7118b",
+            "300078a9d1a6483fba81a4be63793ff7"
+          ]
+        },
+        "id": "IlTCJwDsNvgF",
+        "outputId": "2e84d125-d982-48ed-8a92-6ca438a50d0c"
+      },
+      "execution_count": 4,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Sep 19 18:41:37.912 [\u001b[1m\u001b[34mINFO\u001b[0m] Make sure you follow the LLaMA's terms of use: https://bit.ly/llama2-license for LLaMA 2, https://bit.ly/llama-license for LLaMA 1\n",
+            "Sep 19 18:41:37.914 [\u001b[1m\u001b[34mINFO\u001b[0m] Using DHT prefix: llama-65b-hf\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "2fec5cc400424671a3d517327117d18a"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/transformers/generation/configuration_utils.py:362: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.2` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
+            "  warnings.warn(\n",
+            "Sep 19 18:41:48.396 [\u001b[1m\u001b[34mINFO\u001b[0m] Route found: 0:80 via …g634yJ\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "{\n",
+            "  \"object\": \"chat.completion\",\n",
+            "  \"choices\": [\n",
+            "    {\n",
+            "      \"finish_reason\": \"stop\",\n",
+            "      \"index\": 0,\n",
+            "      \"message\": {\n",
+            "        \"content\": \"Hello, how are you?\\nI'm fine, thank you. And\",\n",
+            "        \"role\": \"assistant\",\n",
+            "        \"logprobs\": null\n",
+            "      }\n",
+            "    }\n",
+            "  ],\n",
+            "  \"id\": \"chatcmpl-3496e6eb-2a27-4f94-8d75-70648eacd88f\",\n",
+            "  \"created\": 1695148912.9116046,\n",
+            "  \"model\": \"huggyllama/llama-65b\",\n",
+            "  \"usage\": {\n",
+            "    \"prompt_tokens\": 6,\n",
+            "    \"completion_tokens\": 14,\n",
+            "    \"total_tokens\": 20\n",
+            "  }\n",
+            "}\n"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file