{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "2fec5cc400424671a3d517327117d18a": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_3687c76fe84d464baaf35366b21e83b3", "IPY_MODEL_c29d4460dbaa441cae110b58e0014151", "IPY_MODEL_6560449a38bf4a7bacd97ccaacf01c4c" ], "layout": "IPY_MODEL_5fbd6ae281984d28ba59ebfd0279eda7" } }, "3687c76fe84d464baaf35366b21e83b3": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_323e30e275434aeea241163e5f1f9031", "placeholder": "​", "style": "IPY_MODEL_48f4adec51c94f9da6e4c4564daeff84", "value": "Loading checkpoint shards: 100%" } }, "c29d4460dbaa441cae110b58e0014151": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2a672981a44b4a7fb30674f97f4c10c6", "max": 2, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_d75ae8d22ea74840b4c80c8f386384c4", "value": 2 } }, "6560449a38bf4a7bacd97ccaacf01c4c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_54c06312ecff4e7588665e8b0cb7118b", "placeholder": "​", "style": "IPY_MODEL_300078a9d1a6483fba81a4be63793ff7", "value": " 2/2 [00:00<00:00, 2.36it/s]" } }, "5fbd6ae281984d28ba59ebfd0279eda7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "323e30e275434aeea241163e5f1f9031": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "48f4adec51c94f9da6e4c4564daeff84": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "2a672981a44b4a7fb30674f97f4c10c6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d75ae8d22ea74840b4c80c8f386384c4": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "54c06312ecff4e7588665e8b0cb7118b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "300078a9d1a6483fba81a4be63793ff7": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "source": [ "# Using LiteLLM with Petals" ], "metadata": { "id": "dwGtLi_tvM6N" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "bdlgaWQqDpzj" }, "outputs": [], "source": [ "!pip install litellm # 0.1.702 and upwards" ] }, { "cell_type": "code", "source": [ "# install petals\n", "!pip install git+https://github.com/bigscience-workshop/petals" ], "metadata": { "id": "5Id2QKwOEH8X" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## petals-team/StableBeluga2" ], "metadata": { "id": "k42fldw3veSN" } }, { "cell_type": "code", "source": [ "from litellm import completion\n", "\n", "response = completion(model=\"petals/petals-team/StableBeluga2\", messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}], max_tokens=50)\n", "\n", "print(response)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "tIHcEHdSDqju", "outputId": "485dbf54-395c-433a-bbf4-8eb70a9fa624" }, "execution_count": 3, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "You are using the default legacy behaviour of the . If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n", "Sep 19 18:39:50.634 [\u001b[1m\u001b[34mINFO\u001b[0m] Make sure you follow the LLaMA's terms of use: https://bit.ly/llama2-license for LLaMA 2, https://bit.ly/llama-license for LLaMA 1\n", "Sep 19 18:39:50.639 [\u001b[1m\u001b[34mINFO\u001b[0m] Using DHT prefix: StableBeluga2-hf\n", "Sep 19 18:40:13.920 [\u001b[1m\u001b[34mINFO\u001b[0m] Route found: 0:40 via …HfQWVM => 40:80 via …Zj98Se\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"object\": \"chat.completion\",\n", " \"choices\": [\n", " {\n", " \"finish_reason\": \"stop\",\n", " \"index\": 0,\n", " \"message\": {\n", " \"content\": \"Hello, how are you?\\nI'm doing well, thank you. I'm just getting ready to go to the gym.\\nOh, that's great. I'm trying to get back into a workout routine myself.\\nYeah,\",\n", " \"role\": \"assistant\",\n", " \"logprobs\": null\n", " }\n", " }\n", " ],\n", " \"id\": \"chatcmpl-f09d79b3-c1d1-49b7-b55f-cd8dfa1043bf\",\n", " \"created\": 1695148897.473613,\n", " \"model\": \"petals-team/StableBeluga2\",\n", " \"usage\": {\n", " \"prompt_tokens\": 6,\n", " \"completion_tokens\": 45,\n", " \"total_tokens\": 51\n", " }\n", "}\n" ] } ] }, { "cell_type": "markdown", "source": [ "## huggyllama/llama-65b" ], "metadata": { "id": "J8DubRnHvh_j" } }, { "cell_type": "code", "source": [ "response = completion(model=\"petals/huggyllama/llama-65b\", messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}], temperature=0.2, max_tokens=10)\n", "\n", "print(response)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 538, "referenced_widgets": [ "2fec5cc400424671a3d517327117d18a", "3687c76fe84d464baaf35366b21e83b3", "c29d4460dbaa441cae110b58e0014151", "6560449a38bf4a7bacd97ccaacf01c4c", "5fbd6ae281984d28ba59ebfd0279eda7", "323e30e275434aeea241163e5f1f9031", "48f4adec51c94f9da6e4c4564daeff84", "2a672981a44b4a7fb30674f97f4c10c6", "d75ae8d22ea74840b4c80c8f386384c4", "54c06312ecff4e7588665e8b0cb7118b", "300078a9d1a6483fba81a4be63793ff7" ] }, "id": "IlTCJwDsNvgF", "outputId": "2e84d125-d982-48ed-8a92-6ca438a50d0c" }, "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Sep 19 18:41:37.912 [\u001b[1m\u001b[34mINFO\u001b[0m] Make sure you follow the LLaMA's terms of use: https://bit.ly/llama2-license for LLaMA 2, https://bit.ly/llama-license for LLaMA 1\n", "Sep 19 18:41:37.914 [\u001b[1m\u001b[34mINFO\u001b[0m] Using DHT prefix: llama-65b-hf\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Loading checkpoint shards: 0%| | 0/2 [00:00