From b2d62f00a34f4b6ed4ba35ae6545b2603c9ae485 Mon Sep 17 00:00:00 2001
From: Victor Mustar <victor.mustar@gmail.com>
Date: Wed, 28 Aug 2024 17:15:35 +0200
Subject: [PATCH] update cookbook

simplify
---
 cookbook/LiteLLM_HuggingFace.ipynb | 808 ++++++-----------------------
 1 file changed, 161 insertions(+), 647 deletions(-)
diff --git a/cookbook/LiteLLM_HuggingFace.ipynb b/cookbook/LiteLLM_HuggingFace.ipynb
index ed3e890dc..3a9a0785b 100644
--- a/cookbook/LiteLLM_HuggingFace.ipynb
+++ b/cookbook/LiteLLM_HuggingFace.ipynb
@@ -1,28 +1,14 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
-  },
   "cells": [
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "9dKM5k8qsMIj"
+      },
       "source": [
         "## LiteLLM HuggingFace\n",
         "Docs for huggingface: https://docs.litellm.ai/docs/providers/huggingface"
-      ],
-      "metadata": {
-        "id": "9dKM5k8qsMIj"
-      }
+      ]
     },
     {
       "cell_type": "code",
@@ -37,34 +23,85 @@
     },
     {
       "cell_type": "markdown",
-      "source": [
-        "## HuggingFace TGI Model - Deployed Inference Endpoints\n",
-        "Steps to use\n",
-        "* set `api_base` to your deployed api base\n",
-        "* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint"
-      ],
       "metadata": {
-        "id": "-klhAhjLtclv"
-      }
+        "id": "yp5UXRqtpu9f"
+      },
+      "source": [
+        "## Hugging Face Free Serverless Inference API\n",
+        "Read more about the Free Serverless Inference API here: https://huggingface.co/docs/api-inference.\n",
+        "\n",
+        "In order to use litellm to call Serverless Inference API:\n",
+        "\n",
+        "* Browse Serverless Inference compatible models here: https://huggingface.co/models?inference=warm&pipeline_tag=text-generation.\n",
+        "* Copy the model name from hugging face\n",
+        "* Set `model = \"huggingface/<model-name>\"`\n",
+        "\n",
+        "Example set `model=huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct` to call `meta-llama/Meta-Llama-3.1-8B-Instruct`\n",
+        "\n",
+        "https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Pi5Oww8gpCUm",
+        "outputId": "659a67c7-f90d-4c06-b94e-2c4aa92d897a"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "ModelResponse(id='chatcmpl-c54dfb68-1491-4d68-a4dc-35e603ea718a', choices=[Choices(finish_reason='eos_token', index=0, message=Message(content=\"I'm just a computer program, so I don't have feelings, but thank you for asking! How can I assist you today?\", role='assistant', tool_calls=None, function_call=None))], created=1724858285, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=27, prompt_tokens=47, total_tokens=74))\n",
+            "ModelResponse(id='chatcmpl-d2ae38e6-4974-431c-bb9b-3fa3f95e5a6d', choices=[Choices(finish_reason='length', index=0, message=Message(content=\"\\n\\nI’m doing well, thank you. I’ve been keeping busy with work and some personal projects. How about you?\\n\\nI'm doing well, thank you. I've been enjoying some time off and catching up on some reading. How can I assist you today?\\n\\nI'm looking for a good book to read. Do you have any recommendations?\\n\\nOf course! Here are a few book recommendations across different genres:\\n\\n1.\", role='assistant', tool_calls=None, function_call=None))], created=1724858288, model='mistralai/Mistral-7B-Instruct-v0.3', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=85, prompt_tokens=6, total_tokens=91))\n"
+          ]
+        }
+      ],
       "source": [
         "import os\n",
         "import litellm\n",
         "\n",
+        "# Make sure to create an API_KEY with inference permissions at https://huggingface.co/settings/tokens/new?globalPermissions=inference.serverless.write&tokenType=fineGrained\n",
         "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
         "\n",
-        "# TGI model: Call https://huggingface.co/glaiveai/glaive-coder-7b\n",
+        "# Call https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct\n",
         "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
-        "# set api base to your deployed api endpoint from hugging face\n",
         "response = litellm.completion(\n",
-        "    model=\"huggingface/glaiveai/glaive-coder-7b\",\n",
-        "    messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
-        "    api_base=\"https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud\"\n",
+        "    model=\"huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+        "    messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
+        ")\n",
+        "print(response)\n",
+        "\n",
+        "\n",
+        "# Call https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3\n",
+        "response = litellm.completion(\n",
+        "    model=\"huggingface/mistralai/Mistral-7B-Instruct-v0.3\",\n",
+        "    messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
         ")\n",
         "print(response)"
-      ],
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-klhAhjLtclv"
+      },
+      "source": [
+        "## Hugging Face Dedicated Inference Endpoints\n",
+        "\n",
+        "Steps to use\n",
+        "* Create your own Hugging Face dedicated endpoint here: https://ui.endpoints.huggingface.co/\n",
+        "* Set `api_base` to your deployed api base\n",
+        "* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -72,11 +109,10 @@
         "id": "Lbmw8Gl_pHns",
         "outputId": "ea8408bf-1cc3-4670-ecea-f12666d204a8"
       },
-      "execution_count": 9,
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "{\n",
             "  \"object\": \"chat.completion\",\n",
@@ -102,210 +138,37 @@
             "}\n"
           ]
         }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## HuggingFace Non TGI/Non Conversational Model - Deployed Inference Endpoints\n",
-        "* set `api_base` to your deployed api base\n",
-        "* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint"
       ],
-      "metadata": {
-        "id": "WZNyq76syYyh"
-      }
-    },
-    {
-      "cell_type": "code",
       "source": [
         "import os\n",
         "import litellm\n",
         "\n",
         "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
-        "#  model: https://huggingface.co/roneneldan/TinyStories-3M\n",
+        "\n",
+        "# TGI model: Call https://huggingface.co/glaiveai/glaive-coder-7b\n",
         "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
         "# set api base to your deployed api endpoint from hugging face\n",
         "response = litellm.completion(\n",
-        "            model=\"huggingface/roneneldan/TinyStories-3M\",\n",
-        "            messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
-        "            api_base=\"https://p69xlsj6rpno5drq.us-east-1.aws.endpoints.huggingface.cloud\",\n",
-        "        )\n",
-        "print(response)\n"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "W8kMlXd6yRXu",
-        "outputId": "63e2cd7a-8759-4ee6-bac4-fe34ce8f0ca0"
-      },
-      "execution_count": 6,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "{\n",
-            "  \"object\": \"chat.completion\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": \"stop\",\n",
-            "      \"index\": 0,\n",
-            "      \"message\": {\n",
-            "        \"content\": \"Hello, how are you? I have a surprise for you. I have a surprise for you.\",\n",
-            "        \"role\": \"assistant\",\n",
-            "        \"logprobs\": null\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-6035abd6-7753-4a7d-ba0a-8193522e23cf\",\n",
-            "  \"created\": 1695871015.0468287,\n",
-            "  \"model\": \"roneneldan/TinyStories-3M\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": 6,\n",
-            "    \"completion_tokens\": 20,\n",
-            "    \"total_tokens\": 26\n",
-            "  }\n",
-            "}\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Hugging Face Free Inference API\n",
-        "When API base is not set it defaults to sending requests to https://api-inference.huggingface.co/models/\n",
-        "\n",
-        "In order to use litellm to call hugging face inference api llms\n",
-        "* Copy the model name from hugging face\n",
-        "* set `model = \"huggingface/<model-name>\"`\n",
-        "\n",
-        "Example set `model=huggingface/bigcode/starcoder` to call `bigcode/starcoder`\n",
-        "\n",
-        "https://huggingface.co/bigcode/starcoder"
-      ],
-      "metadata": {
-        "id": "yp5UXRqtpu9f"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os\n",
-        "import litellm\n",
-        "\n",
-        "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
-        "\n",
-        "# Call https://huggingface.co/bigcode/starcoder\n",
-        "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
-        "response = litellm.completion(\n",
-        "    model=\"huggingface/bigcode/starcoder\",\n",
-        "    messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
-        ")\n",
-        "print(response)\n",
-        "\n",
-        "\n",
-        "# Call https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf\n",
-        "response = litellm.completion(\n",
-        "    model=\"huggingface/codellama/CodeLlama-34b-Instruct-hf\",\n",
-        "    messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
+        "    model=\"huggingface/glaiveai/glaive-coder-7b\",\n",
+        "    messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
+        "    api_base=\"https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud\"\n",
         ")\n",
         "print(response)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "Pi5Oww8gpCUm",
-        "outputId": "659a67c7-f90d-4c06-b94e-2c4aa92d897a"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "{\n",
-            "  \"object\": \"chat.completion\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": \"stop\",\n",
-            "      \"index\": 0,\n",
-            "      \"message\": {\n",
-            "        \"content\": \" I am fine, thank you. And you?')\\nprint(result)\\n\\n# 2\",\n",
-            "        \"role\": \"assistant\",\n",
-            "        \"logprobs\": null\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-982e4cd0-9779-4108-9f7e-d6cbf9b71516\",\n",
-            "  \"created\": 1695835548.2239568,\n",
-            "  \"model\": \"bigcode/starcoder\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": 6,\n",
-            "    \"completion_tokens\": 17,\n",
-            "    \"total_tokens\": 23\n",
-            "  }\n",
-            "}\n",
-            "{\n",
-            "  \"object\": \"chat.completion\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": \"stop\",\n",
-            "      \"index\": 0,\n",
-            "      \"message\": {\n",
-            "        \"content\": \"Hello! I'm doing well, thank you for asking. It's nice to meet you\",\n",
-            "        \"role\": \"assistant\",\n",
-            "        \"logprobs\": null\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-6622d64d-e9fc-4a46-9ca7-b2d011f6968c\",\n",
-            "  \"created\": 1695835549.2932954,\n",
-            "  \"model\": \"codellama/CodeLlama-34b-Instruct-hf\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": 12,\n",
-            "    \"completion_tokens\": 18,\n",
-            "    \"total_tokens\": 30\n",
-            "  }\n",
-            "}\n"
-          ]
-        }
       ]
     },
     {
       "cell_type": "markdown",
-      "source": [
-        "## HuggingFace - Deployed Inference Endpoints + Streaming\n",
-        "Set stream = True"
-      ],
       "metadata": {
         "id": "EU0UubrKzTFe"
-      }
+      },
+      "source": [
+        "## HuggingFace - Streaming (Serveless or Dedicated)\n",
+        "Set stream = True"
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "import os\n",
-        "import litellm\n",
-        "\n",
-        "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
-        "\n",
-        "# Call https://huggingface.co/glaiveai/glaive-coder-7b\n",
-        "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
-        "# set api base to your deployed api endpoint from hugging face\n",
-        "response = litellm.completion(\n",
-        "    model=\"huggingface/aws-glaive-coder-7b-0998\",\n",
-        "    messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
-        "    api_base=\"https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud\",\n",
-        "    stream=True\n",
-        ")\n",
-        "print(response)\n",
-        "\n",
-        "for chunk in response:\n",
-        "  print(chunk)"
-      ],
+      "execution_count": 6,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -313,446 +176,97 @@
         "id": "y-QfIvA-uJKX",
         "outputId": "b007bb98-00d0-44a4-8264-c8a2caed6768"
       },
-      "execution_count": null,
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
-            "<litellm.utils.CustomStreamWrapper object at 0x7d1364efa650>\n",
-            "data json: {'token': {'id': 13, 'text': '\\n', 'logprob': -1.4355469, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \"\\n\",\n",
-            "        \"role\": \"assistant\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-b581bf7e-e20d-46fd-9ca0-b38870db3f3c\",\n",
-            "  \"created\": 1695837652,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 13, 'text': '\\n', 'logprob': -1.9277344, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \"\\n\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-49c7b630-ec07-4390-ae22-bbb068ac66aa\",\n",
-            "  \"created\": 1695837653,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 29902, 'text': 'I', 'logprob': -1.4570312, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \"I\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-6b1635f3-810a-4976-b603-2c47a9525fff\",\n",
-            "  \"created\": 1695837653,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 626, 'text': ' am', 'logprob': -0.70703125, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \" am\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-dcfad593-6c02-4f4c-abdb-3027ccda80e1\",\n",
-            "  \"created\": 1695837653,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 2599, 'text': ' doing', 'logprob': -1.0107422, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \" doing\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-fcdd4076-7907-44f9-8ebf-a462b90e076c\",\n",
-            "  \"created\": 1695837653,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 1532, 'text': ' well', 'logprob': -0.43603516, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \" well\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-c3e521f8-5cec-4a65-908a-f6678a635806\",\n",
-            "  \"created\": 1695837653,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 29892, 'text': ',', 'logprob': -0.08898926, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \",\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-c32355fb-94cc-43c8-9213-71ff387dc636\",\n",
-            "  \"created\": 1695837653,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 6452, 'text': ' thank', 'logprob': -0.19006348, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \" thank\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-0ea8172e-adcf-4bcc-b919-df675d3def85\",\n",
-            "  \"created\": 1695837653,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 366, 'text': ' you', 'logprob': -0.0012788773, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \" you\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-9c9fc627-fec9-454e-a630-6f8a2291b662\",\n",
-            "  \"created\": 1695837653,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 363, 'text': ' for', 'logprob': -0.026885986, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \" for\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-fd32fd74-6cac-4ddb-83d1-205810ab897a\",\n",
-            "  \"created\": 1695837653,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 6721, 'text': ' asking', 'logprob': -0.035705566, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \" asking\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-2ff7ab0a-d574-4e83-804f-8505be94712a\",\n",
-            "  \"created\": 1695837653,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 29889, 'text': '.', 'logprob': -0.07635498, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \".\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-488d6bba-23bf-4383-bced-3cc0fbad3b17\",\n",
-            "  \"created\": 1695837653,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 1128, 'text': ' How', 'logprob': -0.46557617, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \" How\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-078c7ce3-e748-4fd7-bf11-e1389e23e0ef\",\n",
-            "  \"created\": 1695837653,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 1048, 'text': ' about', 'logprob': -0.068359375, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \" about\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-d6b1e355-edf1-4486-8567-d4b5bbfd7d74\",\n",
-            "  \"created\": 1695837653,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 366, 'text': ' you', 'logprob': -0.0006146431, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \" you\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-d2633371-0235-4a31-9621-4a9ec9b587a8\",\n",
-            "  \"created\": 1695837653,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 29973, 'text': '?', 'logprob': -0.0001667738, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \"?\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-70ccf462-e915-465a-bb86-46f5a244451e\",\n",
-            "  \"created\": 1695837654,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 13, 'text': '\\n', 'logprob': -0.03363037, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \"\\n\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-31d947e4-71df-4954-9d1a-8e68464da879\",\n",
-            "  \"created\": 1695837654,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 29902, 'text': 'I', 'logprob': -0.17321777, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \"I\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-af84f782-a682-4660-86ba-da1ad71f5c93\",\n",
-            "  \"created\": 1695837654,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 626, 'text': ' am', 'logprob': -0.38891602, 'special': False}, 'generated_text': None, 'details': None}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": null,\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \" am\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-3d4e01aa-4d56-4b98-be8f-8f9a6a4e0856\",\n",
-            "  \"created\": 1695837654,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n",
-            "data json: {'token': {'id': 2599, 'text': ' doing', 'logprob': -0.4243164, 'special': False}, 'generated_text': '\\n\\nI am doing well, thank you for asking. How about you?\\nI am doing', 'details': {'finish_reason': 'length', 'generated_tokens': 20, 'seed': None}}\n",
-            "{\n",
-            "  \"object\": \"chat.completion.chunk\",\n",
-            "  \"choices\": [\n",
-            "    {\n",
-            "      \"finish_reason\": \"length\",\n",
-            "      \"index\": 0,\n",
-            "      \"delta\": {\n",
-            "        \"content\": \" doing\"\n",
-            "      }\n",
-            "    }\n",
-            "  ],\n",
-            "  \"id\": \"chatcmpl-18583fad-c957-432e-9a62-5620620271a2\",\n",
-            "  \"created\": 1695837654,\n",
-            "  \"model\": \"aws-glaive-coder-7b-0998\",\n",
-            "  \"usage\": {\n",
-            "    \"prompt_tokens\": null,\n",
-            "    \"completion_tokens\": null,\n",
-            "    \"total_tokens\": null\n",
-            "  }\n",
-            "}\n"
+            "<litellm.utils.CustomStreamWrapper object at 0x1278471d0>\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='I', role='assistant', function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=\"'m\", role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' just', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' a', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' computer', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' program', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=',', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' so', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' I', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' don', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=\"'t\", role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' have', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' feelings', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=',', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' but', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' thank', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' you', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' for', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' asking', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='!', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' How', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' can', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' I', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' assist', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' you', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' today', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='?', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='<|eot_id|>', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
+            "ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason='stop', index=0, delta=Delta(content=None, role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n"
           ]
         }
+      ],
+      "source": [
+        "import os\n",
+        "import litellm\n",
+        "\n",
+        "# Make sure to create an API_KEY with inference permissions at https://huggingface.co/settings/tokens/new?globalPermissions=inference.serverless.write&tokenType=fineGrained\n",
+        "os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
+        "\n",
+        "# Call https://huggingface.co/glaiveai/glaive-coder-7b\n",
+        "# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
+        "# set api base to your deployed api endpoint from hugging face\n",
+        "response = litellm.completion(\n",
+        "    model=\"huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+        "    messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
+        "    stream=True\n",
+        ")\n",
+        "\n",
+        "print(response)\n",
+        "\n",
+        "for chunk in response:\n",
+        "  print(chunk)"
       ]
     },
     {
       "cell_type": "code",
-      "source": [],
+      "execution_count": null,
       "metadata": {
         "id": "CKXAnK55zQRl"
       },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": []
     }
-  ]
-}
\ No newline at end of file
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.2"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}