From 361bf02e53f04d66ab3c39a9118d741ff528f9b7 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 27 Sep 2023 20:19:38 -0700
Subject: [PATCH] update cookbook

---
 cookbook/LiteLLM_HuggingFace.ipynb | 209 +++++++++++++++++++----------
 1 file changed, 138 insertions(+), 71 deletions(-)

diff --git a/cookbook/LiteLLM_HuggingFace.ipynb b/cookbook/LiteLLM_HuggingFace.ipynb
index 04ab14179..ed3e890dc 100644
--- a/cookbook/LiteLLM_HuggingFace.ipynb
+++ b/cookbook/LiteLLM_HuggingFace.ipynb
@@ -35,6 +35,142 @@
         "!pip install litellm"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## HuggingFace TGI Model - Deployed Inference Endpoints\n",
+        "Steps to use\n",
+        "* set `api_base` to your deployed api base\n",
+        "* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint"
+      ],
+      "metadata": {
+        "id": "-klhAhjLtclv"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "import litellm\n",
+        "\n",
+        "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
+        "\n",
+        "# TGI model: Call https://huggingface.co/glaiveai/glaive-coder-7b\n",
+        "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
+        "# set api base to your deployed api endpoint from hugging face\n",
+        "response = litellm.completion(\n",
+        "    model=\"huggingface/glaiveai/glaive-coder-7b\",\n",
+        "    messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
+        "    api_base=\"https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud\"\n",
+        ")\n",
+        "print(response)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Lbmw8Gl_pHns",
+        "outputId": "ea8408bf-1cc3-4670-ecea-f12666d204a8"
+      },
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "{\n",
+            "  \"object\": \"chat.completion\",\n",
+            "  \"choices\": [\n",
+            "    {\n",
+            "      \"finish_reason\": \"length\",\n",
+            "      \"index\": 0,\n",
+            "      \"message\": {\n",
+            "        \"content\": \"\\n\\nI am doing well, thank you for asking. How about you?\\nI am doing\",\n",
+            "        \"role\": \"assistant\",\n",
+            "        \"logprobs\": -8.9481967812\n",
+            "      }\n",
+            "    }\n",
+            "  ],\n",
+            "  \"id\": \"chatcmpl-74dc9d89-3916-47ce-9bea-b80e66660f77\",\n",
+            "  \"created\": 1695871068.8413374,\n",
+            "  \"model\": \"glaiveai/glaive-coder-7b\",\n",
+            "  \"usage\": {\n",
+            "    \"prompt_tokens\": 6,\n",
+            "    \"completion_tokens\": 18,\n",
+            "    \"total_tokens\": 24\n",
+            "  }\n",
+            "}\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## HuggingFace Non TGI/Non Conversational Model - Deployed Inference Endpoints\n",
+        "* set `api_base` to your deployed api base\n",
+        "* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint"
+      ],
+      "metadata": {
+        "id": "WZNyq76syYyh"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "import litellm\n",
+        "\n",
+        "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
+        "#  model: https://huggingface.co/roneneldan/TinyStories-3M\n",
+        "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
+        "# set api base to your deployed api endpoint from hugging face\n",
+        "response = litellm.completion(\n",
+        "            model=\"huggingface/roneneldan/TinyStories-3M\",\n",
+        "            messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
+        "            api_base=\"https://p69xlsj6rpno5drq.us-east-1.aws.endpoints.huggingface.cloud\",\n",
+        "        )\n",
+        "print(response)\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "W8kMlXd6yRXu",
+        "outputId": "63e2cd7a-8759-4ee6-bac4-fe34ce8f0ca0"
+      },
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "{\n",
+            "  \"object\": \"chat.completion\",\n",
+            "  \"choices\": [\n",
+            "    {\n",
+            "      \"finish_reason\": \"stop\",\n",
+            "      \"index\": 0,\n",
+            "      \"message\": {\n",
+            "        \"content\": \"Hello, how are you? I have a surprise for you. I have a surprise for you.\",\n",
+            "        \"role\": \"assistant\",\n",
+            "        \"logprobs\": null\n",
+            "      }\n",
+            "    }\n",
+            "  ],\n",
+            "  \"id\": \"chatcmpl-6035abd6-7753-4a7d-ba0a-8193522e23cf\",\n",
+            "  \"created\": 1695871015.0468287,\n",
+            "  \"model\": \"roneneldan/TinyStories-3M\",\n",
+            "  \"usage\": {\n",
+            "    \"prompt_tokens\": 6,\n",
+            "    \"completion_tokens\": 20,\n",
+            "    \"total_tokens\": 26\n",
+            "  }\n",
+            "}\n"
+          ]
+        }
+      ]
+    },
     {
       "cell_type": "markdown",
       "source": [
@@ -84,7 +220,7 @@
         "id": "Pi5Oww8gpCUm",
         "outputId": "659a67c7-f90d-4c06-b94e-2c4aa92d897a"
       },
-      "execution_count": 8,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -138,75 +274,6 @@
         }
       ]
     },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## HuggingFace - Deployed Inference Endpoints\n",
-        "Steps to use\n",
-        "* set `api_base` to your deployed api base\n",
-        "* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint"
-      ],
-      "metadata": {
-        "id": "-klhAhjLtclv"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os\n",
-        "import litellm\n",
-        "\n",
-        "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
-        "\n",
-        "# Call https://huggingface.co/glaiveai/glaive-coder-7b\n",
-        "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
-        "# set api base to your deployed api endpoint from hugging face\n",
-        "response = litellm.completion(\n",
-        "    model=\"huggingface/aws-glaive-coder-7b-0998\",\n",
-        "    messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
-        "    api_base=\"https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud\"\n",
-        ")\n",
-        "print(response)\n"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "Lbmw8Gl_pHns",
-        "outputId": "60b3067f-4569-4606-bb08-a186bfdbaecc"
-      },
-      "execution_count": 11,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "{\n",
-            "  \"object\": \"chat.completion\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": \"length\",\n",
-            "      \"index\": 0,\n",
-            "      \"message\": {\n",
-            "        \"content\": \"\\n\\nI am doing well, thank you for asking. How about you?\\nI am doing\",\n",
-            "        \"role\": \"assistant\",\n",
-            "        \"logprobs\": -8.9481967812\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-b4fb54a7-e4f3-476a-8e2b-3d1745f5c8a5\",\n",
-            "  \"created\": 1695837218.8811502,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": 6,\n",
-            "    \"completion_tokens\": 18,\n",
-            "    \"total_tokens\": 24\n",
-            "  }\n",
-            "}\n"
-          ]
-        }
-      ]
-    },
     {
       "cell_type": "markdown",
       "source": [
@@ -246,7 +313,7 @@
         "id": "y-QfIvA-uJKX",
         "outputId": "b007bb98-00d0-44a4-8264-c8a2caed6768"
       },
-      "execution_count": 12,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",