add instructions and code to support more providers in guide

2025-08-07 02:58:21 +00:00 · 2025-01-27 13:58:54 -08:00 · 2025-01-27 13:58:54 -08:00 · 0e69d71eb9
commit 0e69d71eb9
parent 3c1a2c3d66
1 changed files with 9362 additions and 9324 deletions
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@ -37,7 +37,7 @@
    "id": "K4AvfUAJZOeS"
   },
   "source": [
-        "### 1.1. Create TogetherAI account\n",
+    "### 1.1. Get API Key from Cloud Provider. \n",
    "\n",
    "\n",
    "In order to run inference for the llama models, you will need to use an inference provider. Llama stack supports a number of inference [providers](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/inference).\n",
@ -47,7 +47,7 @@
    "\n",
    "Steps [here](https://docs.google.com/document/d/1Vg998IjRW_uujAPnHdQ9jQWvtmkZFt74FldW2MblxPY/edit?usp=sharing).\n",
    "\n",
-        "You can also use Fireworks.ai or even Ollama if you would like to.\n",
+    "You can also use Fireworks.ai, Bedrock, Cerebras, Nvidia, Sambanova or even Ollama if you would like to.\n",
    "\n",
    "\n",
    "\n",
@ -79,6 +79,9 @@
    },
    "collapsed": true,
    "id": "J2kGed0R5PSf",
+    "jupyter": {
+     "outputs_hidden": true
+    },
    "outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
   },
   "outputs": [
@ -198,12 +201,12 @@
  },
  {
   "cell_type": "markdown",
-      "id": "414301dc",
+   "id": "5b5c4486-4602-4f08-8914-e66770d62461",
   "metadata": {
    "id": "414301dc"
   },
   "source": [
-        "### 1.3. Configure Llama Stack for Together\n",
+    "### 1.3. Configure Llama Stack for the provider\n",
    "\n",
    "\n",
    "Llama Stack is architected as a collection of lego blocks which can be assembled as needed.\n",
@ -226,6 +229,9 @@
    },
    "collapsed": true,
    "id": "HaepEZXCDgif",
+    "jupyter": {
+     "outputs_hidden": true
+    },
    "outputId": "9314f698-593d-4c1a-ea15-15c735dc1023"
   },
   "outputs": [
@ -552,9 +558,10 @@
   ],
   "source": [
    "# NBVAL_SKIP\n",
-        "\n",
+    "# Choose the provider from our list of supported provider ['bedrock','together','fireworks','cerebras','hf-endpoint','nvidia','sambanova']\n",
+    "PROVIDER = 'together'\n",
    "# This will build all the dependencies you will need\n",
-        "!llama stack build --template together --image-type venv"
+    "!llama stack build --template PROVIDER --image-type venv"
   ]
  },
  {
@ -566,7 +573,7 @@
   "source": [
    "### 1.4. Initialize Llama Stack\n",
    "\n",
-        "Now that all dependencies have been installed, we can initialize llama stack. We will first set the `TOGETHER_API_KEY` environment variable\n"
+    "Now that all dependencies have been installed, we can initialize llama stack. We will first set our cloud api key as the `API_KEY` environment variable and the search api key as `TAVILY_SEARCH_API_KEY` environment variable\n"
   ]
  },
  {
@ -703,6 +710,9 @@
    },
    "collapsed": true,
    "id": "E1UFuJC570Tk",
+    "jupyter": {
+     "outputs_hidden": true
+    },
    "outputId": "aebb69d4-c167-4de5-eb8a-dd19dd538f63"
   },
   "outputs": [
@ -1111,15 +1121,23 @@
   ],
   "source": [
    "import os\n",
+    "def get_api_string(provider):\n",
+    "    assert(provider in ['bedrock','together','fireworks','cerebras','hf-endpoint','nvidia','sambanova'])\n",
+    "    if PROVIDER == \"hf-endpoint\":\n",
+    "        return 'HF_API_TOKEN'\n",
+    "    else:\n",
+    "        return provider.upper()+ '_API_KEY'\n",
    "\n",
    "try:\n",
    "    from google.colab import userdata\n",
-        "    os.environ['TOGETHER_API_KEY'] = userdata.get('TOGETHER_API_KEY')\n",
+    "    os.environ[get_api_string(PROVIDER)] = userdata.get('API_KEY')\n",
    "    os.environ['TAVILY_SEARCH_API_KEY'] = userdata.get('TAVILY_SEARCH_API_KEY')\n",
    "except ImportError:\n",
    "    print(\"Not in Google Colab environment\")\n",
+    "    os.environ[get_api_string(PROVIDER)] = os.environ['API_KEY']\n",
    "\n",
-        "for key in ['TOGETHER_API_KEY', 'TAVILY_SEARCH_API_KEY']:\n",
+    "\n",
+    "for key in [get_api_string(PROVIDER), 'TAVILY_SEARCH_API_KEY']:\n",
    "    try:\n",
    "        api_key = os.environ[key]\n",
    "        if not api_key:\n",
@ -1132,7 +1150,7 @@
    "        ) from None\n",
    "\n",
    "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
-        "client = LlamaStackAsLibraryClient(\"together\", provider_data = {\"tavily_search_api_key\": os.environ['TAVILY_SEARCH_API_KEY']})\n",
+    "client = LlamaStackAsLibraryClient(PROVIDER, provider_data = {\"tavily_search_api_key\": os.environ['TAVILY_SEARCH_API_KEY']})\n",
    "_ = client.initialize()"
   ]
  },
@ -1158,6 +1176,9 @@
    },
    "collapsed": true,
    "id": "ruO9jQna_t_S",
+    "jupyter": {
+     "outputs_hidden": true
+    },
    "outputId": "ab1722a7-62ab-43bb-9cab-4e45bf62068a"
   },
   "outputs": [
@ -2162,6 +2183,9 @@
    },
    "collapsed": true,
    "id": "GvVRuhO-GOov",
+    "jupyter": {
+     "outputs_hidden": true
+    },
    "outputId": "39395e26-bb7d-4616-d51d-036c8bf41427"
   },
   "outputs": [
@ -2391,7 +2415,8 @@
      "Requirement already satisfied: ptyprocess~=0.7.0 in /usr/local/lib/python3.11/dist-packages (from colab-xterm) (0.7.0)\n",
      "Requirement already satisfied: tornado>5.1 in /usr/local/lib/python3.11/dist-packages (from colab-xterm) (6.3.3)\n",
      "Downloading colab_xterm-0.2.0-py3-none-any.whl (115 kB)\n",
-            "\u001b[?25l   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/115.6 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.6/115.6 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25l   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/115.6 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.6/115.6 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hInstalling collected packages: colab-xterm\n",
      "Successfully installed colab-xterm-0.2.0\n"
     ]
@ -2764,7 +2789,19 @@
    },
    {
     "data": {
-            "application/javascript": "\n        (async () => {\n            const url = new URL(await google.colab.kernel.proxyPort(10000, {'cache': true}));\n            const iframe = document.createElement('iframe');\n            iframe.src = url;\n            iframe.setAttribute('width', '100%');\n            iframe.setAttribute('height', '800');\n            iframe.setAttribute('frameborder', 0);\n            document.body.appendChild(iframe);\n        })();\n    ",
+      "application/javascript": [
+       "\n",
+       "        (async () => {\n",
+       "            const url = new URL(await google.colab.kernel.proxyPort(10000, {'cache': true}));\n",
+       "            const iframe = document.createElement('iframe');\n",
+       "            iframe.src = url;\n",
+       "            iframe.setAttribute('width', '100%');\n",
+       "            iframe.setAttribute('height', '800');\n",
+       "            iframe.setAttribute('frameborder', 0);\n",
+       "            document.body.appendChild(iframe);\n",
+       "        })();\n",
+       "    "
+      ],
      "text/plain": [
       "<IPython.core.display.Javascript object>"
      ]
@ -3523,7 +3560,7 @@
    }
   ],
   "source": [
-        "# NBVAL_SKIP    \n",
+    "# NBVAL_SKIP\n",
    "print(f\"Getting traces for session_id={session_id}\")\n",
    "import json\n",
    "\n",
@ -3830,7 +3867,8 @@
   "provenance": []
  },
  "kernelspec": {
-      "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
   "name": "python3"
  },
  "language_info": {
@ -3843,7 +3881,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-      "version": "3.10.15"
+   "version": "3.10.16"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {