From 361bf02e53f04d66ab3c39a9118d741ff528f9b7 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 27 Sep 2023 20:19:38 -0700 Subject: [PATCH] update cookbook --- cookbook/LiteLLM_HuggingFace.ipynb | 209 +++++++++++++++++++---------- 1 file changed, 138 insertions(+), 71 deletions(-) diff --git a/cookbook/LiteLLM_HuggingFace.ipynb b/cookbook/LiteLLM_HuggingFace.ipynb index 04ab14179..ed3e890dc 100644 --- a/cookbook/LiteLLM_HuggingFace.ipynb +++ b/cookbook/LiteLLM_HuggingFace.ipynb @@ -35,6 +35,142 @@ "!pip install litellm" ] }, + { + "cell_type": "markdown", + "source": [ + "## HuggingFace TGI Model - Deployed Inference Endpoints\n", + "Steps to use\n", + "* set `api_base` to your deployed api base\n", + "* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint" + ], + "metadata": { + "id": "-klhAhjLtclv" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "import litellm\n", + "\n", + "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n", + "\n", + "# TGI model: Call https://huggingface.co/glaiveai/glaive-coder-7b\n", + "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n", + "# set api base to your deployed api endpoint from hugging face\n", + "response = litellm.completion(\n", + " model=\"huggingface/glaiveai/glaive-coder-7b\",\n", + " messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n", + " api_base=\"https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud\"\n", + ")\n", + "print(response)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Lbmw8Gl_pHns", + "outputId": "ea8408bf-1cc3-4670-ecea-f12666d204a8" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{\n", + " \"object\": \"chat.completion\",\n", + " \"choices\": [\n", + " {\n", + " \"finish_reason\": \"length\",\n", + " \"index\": 0,\n", + " \"message\": {\n", + " \"content\": \"\\n\\nI am doing well, thank you for asking. How about you?\\nI am doing\",\n", + " \"role\": \"assistant\",\n", + " \"logprobs\": -8.9481967812\n", + " }\n", + " }\n", + " ],\n", + " \"id\": \"chatcmpl-74dc9d89-3916-47ce-9bea-b80e66660f77\",\n", + " \"created\": 1695871068.8413374,\n", + " \"model\": \"glaiveai/glaive-coder-7b\",\n", + " \"usage\": {\n", + " \"prompt_tokens\": 6,\n", + " \"completion_tokens\": 18,\n", + " \"total_tokens\": 24\n", + " }\n", + "}\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## HuggingFace Non TGI/Non Conversational Model - Deployed Inference Endpoints\n", + "* set `api_base` to your deployed api base\n", + "* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint" + ], + "metadata": { + "id": "WZNyq76syYyh" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "import litellm\n", + "\n", + "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n", + "# model: https://huggingface.co/roneneldan/TinyStories-3M\n", + "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n", + "# set api base to your deployed api endpoint from hugging face\n", + "response = litellm.completion(\n", + " model=\"huggingface/roneneldan/TinyStories-3M\",\n", + " messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n", + " api_base=\"https://p69xlsj6rpno5drq.us-east-1.aws.endpoints.huggingface.cloud\",\n", + " )\n", + "print(response)\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "W8kMlXd6yRXu", + "outputId": "63e2cd7a-8759-4ee6-bac4-fe34ce8f0ca0" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{\n", + " \"object\": \"chat.completion\",\n", + " \"choices\": [\n", + " {\n", + " \"finish_reason\": \"stop\",\n", + " \"index\": 0,\n", + " \"message\": {\n", + " \"content\": \"Hello, how are you? I have a surprise for you. I have a surprise for you.\",\n", + " \"role\": \"assistant\",\n", + " \"logprobs\": null\n", + " }\n", + " }\n", + " ],\n", + " \"id\": \"chatcmpl-6035abd6-7753-4a7d-ba0a-8193522e23cf\",\n", + " \"created\": 1695871015.0468287,\n", + " \"model\": \"roneneldan/TinyStories-3M\",\n", + " \"usage\": {\n", + " \"prompt_tokens\": 6,\n", + " \"completion_tokens\": 20,\n", + " \"total_tokens\": 26\n", + " }\n", + "}\n" + ] + } + ] + }, { "cell_type": "markdown", "source": [ @@ -84,7 +220,7 @@ "id": "Pi5Oww8gpCUm", "outputId": "659a67c7-f90d-4c06-b94e-2c4aa92d897a" }, - "execution_count": 8, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -138,75 +274,6 @@ } ] }, - { - "cell_type": "markdown", - "source": [ - "## HuggingFace - Deployed Inference Endpoints\n", - "Steps to use\n", - "* set `api_base` to your deployed api base\n", - "* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint" - ], - "metadata": { - "id": "-klhAhjLtclv" - } - }, - { - "cell_type": "code", - "source": [ - "import os\n", - "import litellm\n", - "\n", - "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n", - "\n", - "# Call https://huggingface.co/glaiveai/glaive-coder-7b\n", - "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n", - "# set api base to your deployed api endpoint from hugging face\n", - "response = litellm.completion(\n", - " model=\"huggingface/aws-glaive-coder-7b-0998\",\n", - " messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n", - " api_base=\"https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud\"\n", - ")\n", - "print(response)\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Lbmw8Gl_pHns", - "outputId": "60b3067f-4569-4606-bb08-a186bfdbaecc" - }, - "execution_count": 11, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "{\n", - " \"object\": \"chat.completion\",\n", - " \"choices\": [\n", - " {\n", - " \"finish_reason\": \"length\",\n", - " \"index\": 0,\n", - " \"message\": {\n", - " \"content\": \"\\n\\nI am doing well, thank you for asking. How about you?\\nI am doing\",\n", - " \"role\": \"assistant\",\n", - " \"logprobs\": -8.9481967812\n", - " }\n", - " }\n", - " ],\n", - " \"id\": \"chatcmpl-b4fb54a7-e4f3-476a-8e2b-3d1745f5c8a5\",\n", - " \"created\": 1695837218.8811502,\n", - " \"model\": \"aws-glaive-coder-7b-0998\",\n", - " \"usage\": {\n", - " \"prompt_tokens\": 6,\n", - " \"completion_tokens\": 18,\n", - " \"total_tokens\": 24\n", - " }\n", - "}\n" - ] - } - ] - }, { "cell_type": "markdown", "source": [ @@ -246,7 +313,7 @@ "id": "y-QfIvA-uJKX", "outputId": "b007bb98-00d0-44a4-8264-c8a2caed6768" }, - "execution_count": 12, + "execution_count": null, "outputs": [ { "output_type": "stream",