update cookbook

2025-04-24 18:24:20 +00:00 · 2023-09-27 20:19:38 -07:00 · 2023-09-27 20:19:38 -07:00 · 361bf02e53
commit 361bf02e53
parent b1ab1191e6
1 changed files with 138 additions and 71 deletions
--- a/cookbook/LiteLLM_HuggingFace.ipynb
+++ b/cookbook/LiteLLM_HuggingFace.ipynb
@ -35,6 +35,142 @@
        "!pip install litellm"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## HuggingFace TGI Model - Deployed Inference Endpoints\n",
        "Steps to use\n",
        "* set `api_base` to your deployed api base\n",
        "* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint"
      ],
      "metadata": {
        "id": "-klhAhjLtclv"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import litellm\n",
        "\n",
        "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
        "\n",
        "# TGI model: Call https://huggingface.co/glaiveai/glaive-coder-7b\n",
        "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
        "# set api base to your deployed api endpoint from hugging face\n",
        "response = litellm.completion(\n",
        "    model=\"huggingface/glaiveai/glaive-coder-7b\",\n",
        "    messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
        "    api_base=\"https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud\"\n",
        ")\n",
        "print(response)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Lbmw8Gl_pHns",
        "outputId": "ea8408bf-1cc3-4670-ecea-f12666d204a8"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{\n",
            "  \"object\": \"chat.completion\",\n",
            "  \"choices\": [\n",
            "    {\n",
            "      \"finish_reason\": \"length\",\n",
            "      \"index\": 0,\n",
            "      \"message\": {\n",
            "        \"content\": \"\\n\\nI am doing well, thank you for asking. How about you?\\nI am doing\",\n",
            "        \"role\": \"assistant\",\n",
            "        \"logprobs\": -8.9481967812\n",
            "      }\n",
            "    }\n",
            "  ],\n",
            "  \"id\": \"chatcmpl-74dc9d89-3916-47ce-9bea-b80e66660f77\",\n",
            "  \"created\": 1695871068.8413374,\n",
            "  \"model\": \"glaiveai/glaive-coder-7b\",\n",
            "  \"usage\": {\n",
            "    \"prompt_tokens\": 6,\n",
            "    \"completion_tokens\": 18,\n",
            "    \"total_tokens\": 24\n",
            "  }\n",
            "}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## HuggingFace Non TGI/Non Conversational Model - Deployed Inference Endpoints\n",
        "* set `api_base` to your deployed api base\n",
        "* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint"
      ],
      "metadata": {
        "id": "WZNyq76syYyh"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import litellm\n",
        "\n",
        "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
        "#  model: https://huggingface.co/roneneldan/TinyStories-3M\n",
        "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
        "# set api base to your deployed api endpoint from hugging face\n",
        "response = litellm.completion(\n",
        "            model=\"huggingface/roneneldan/TinyStories-3M\",\n",
        "            messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
        "            api_base=\"https://p69xlsj6rpno5drq.us-east-1.aws.endpoints.huggingface.cloud\",\n",
        "        )\n",
        "print(response)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "W8kMlXd6yRXu",
        "outputId": "63e2cd7a-8759-4ee6-bac4-fe34ce8f0ca0"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{\n",
            "  \"object\": \"chat.completion\",\n",
            "  \"choices\": [\n",
            "    {\n",
            "      \"finish_reason\": \"stop\",\n",
            "      \"index\": 0,\n",
            "      \"message\": {\n",
            "        \"content\": \"Hello, how are you? I have a surprise for you. I have a surprise for you.\",\n",
            "        \"role\": \"assistant\",\n",
            "        \"logprobs\": null\n",
            "      }\n",
            "    }\n",
            "  ],\n",
            "  \"id\": \"chatcmpl-6035abd6-7753-4a7d-ba0a-8193522e23cf\",\n",
            "  \"created\": 1695871015.0468287,\n",
            "  \"model\": \"roneneldan/TinyStories-3M\",\n",
            "  \"usage\": {\n",
            "    \"prompt_tokens\": 6,\n",
            "    \"completion_tokens\": 20,\n",
            "    \"total_tokens\": 26\n",
            "  }\n",
            "}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
@ -84,7 +220,7 @@
        "id": "Pi5Oww8gpCUm",
        "outputId": "659a67c7-f90d-4c06-b94e-2c4aa92d897a"
      },
-      "execution_count": 8,
+      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
@ -138,75 +274,6 @@
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## HuggingFace - Deployed Inference Endpoints\n",
        "Steps to use\n",
        "* set `api_base` to your deployed api base\n",
        "* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint"
      ],
      "metadata": {
        "id": "-klhAhjLtclv"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import litellm\n",
        "\n",
        "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
        "\n",
        "# Call https://huggingface.co/glaiveai/glaive-coder-7b\n",
        "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
        "# set api base to your deployed api endpoint from hugging face\n",
        "response = litellm.completion(\n",
        "    model=\"huggingface/aws-glaive-coder-7b-0998\",\n",
        "    messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
        "    api_base=\"https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud\"\n",
        ")\n",
        "print(response)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Lbmw8Gl_pHns",
        "outputId": "60b3067f-4569-4606-bb08-a186bfdbaecc"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{\n",
            "  \"object\": \"chat.completion\",\n",
            "  \"choices\": [\n",
            "    {\n",
            "      \"finish_reason\": \"length\",\n",
            "      \"index\": 0,\n",
            "      \"message\": {\n",
            "        \"content\": \"\\n\\nI am doing well, thank you for asking. How about you?\\nI am doing\",\n",
            "        \"role\": \"assistant\",\n",
            "        \"logprobs\": -8.9481967812\n",
            "      }\n",
            "    }\n",
            "  ],\n",
            "  \"id\": \"chatcmpl-b4fb54a7-e4f3-476a-8e2b-3d1745f5c8a5\",\n",
            "  \"created\": 1695837218.8811502,\n",
            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
            "  \"usage\": {\n",
            "    \"prompt_tokens\": 6,\n",
            "    \"completion_tokens\": 18,\n",
            "    \"total_tokens\": 24\n",
            "  }\n",
            "}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
@ -246,7 +313,7 @@
        "id": "y-QfIvA-uJKX",
        "outputId": "b007bb98-00d0-44a4-8264-c8a2caed6768"
      },
-      "execution_count": 12,
+      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",