add instructions and code to support more providers in guide

This commit is contained in:
Kai Wu 2025-01-27 13:58:54 -08:00
parent 3c1a2c3d66
commit 0e69d71eb9

View file

@ -37,7 +37,7 @@
"id": "K4AvfUAJZOeS"
},
"source": [
"### 1.1. Create TogetherAI account\n",
"### 1.1. Get API Key from Cloud Provider. \n",
"\n",
"\n",
"In order to run inference for the llama models, you will need to use an inference provider. Llama stack supports a number of inference [providers](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/inference).\n",
@ -47,7 +47,7 @@
"\n",
"Steps [here](https://docs.google.com/document/d/1Vg998IjRW_uujAPnHdQ9jQWvtmkZFt74FldW2MblxPY/edit?usp=sharing).\n",
"\n",
"You can also use Fireworks.ai or even Ollama if you would like to.\n",
"You can also use Fireworks.ai, Bedrock, Cerebras, Nvidia, Sambanova or even Ollama if you would like to.\n",
"\n",
"\n",
"\n",
@ -79,6 +79,9 @@
},
"collapsed": true,
"id": "J2kGed0R5PSf",
"jupyter": {
"outputs_hidden": true
},
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
},
"outputs": [
@ -198,12 +201,12 @@
},
{
"cell_type": "markdown",
"id": "414301dc",
"id": "5b5c4486-4602-4f08-8914-e66770d62461",
"metadata": {
"id": "414301dc"
},
"source": [
"### 1.3. Configure Llama Stack for Together\n",
"### 1.3. Configure Llama Stack for the provider\n",
"\n",
"\n",
"Llama Stack is architected as a collection of lego blocks which can be assembled as needed.\n",
@ -226,6 +229,9 @@
},
"collapsed": true,
"id": "HaepEZXCDgif",
"jupyter": {
"outputs_hidden": true
},
"outputId": "9314f698-593d-4c1a-ea15-15c735dc1023"
},
"outputs": [
@ -552,9 +558,10 @@
],
"source": [
"# NBVAL_SKIP\n",
"\n",
"# Choose the provider from our list of supported provider ['bedrock','together','fireworks','cerebras','hf-endpoint','nvidia','sambanova']\n",
"PROVIDER = 'together'\n",
"# This will build all the dependencies you will need\n",
"!llama stack build --template together --image-type venv"
"!llama stack build --template PROVIDER --image-type venv"
]
},
{
@ -566,7 +573,7 @@
"source": [
"### 1.4. Initialize Llama Stack\n",
"\n",
"Now that all dependencies have been installed, we can initialize llama stack. We will first set the `TOGETHER_API_KEY` environment variable\n"
"Now that all dependencies have been installed, we can initialize llama stack. We will first set our cloud api key as the `API_KEY` environment variable and the search api key as `TAVILY_SEARCH_API_KEY` environment variable\n"
]
},
{
@ -703,6 +710,9 @@
},
"collapsed": true,
"id": "E1UFuJC570Tk",
"jupyter": {
"outputs_hidden": true
},
"outputId": "aebb69d4-c167-4de5-eb8a-dd19dd538f63"
},
"outputs": [
@ -1111,15 +1121,23 @@
],
"source": [
"import os\n",
"def get_api_string(provider):\n",
" assert(provider in ['bedrock','together','fireworks','cerebras','hf-endpoint','nvidia','sambanova'])\n",
" if PROVIDER == \"hf-endpoint\":\n",
" return 'HF_API_TOKEN'\n",
" else:\n",
" return provider.upper()+ '_API_KEY'\n",
"\n",
"try:\n",
" from google.colab import userdata\n",
" os.environ['TOGETHER_API_KEY'] = userdata.get('TOGETHER_API_KEY')\n",
" os.environ[get_api_string(PROVIDER)] = userdata.get('API_KEY')\n",
" os.environ['TAVILY_SEARCH_API_KEY'] = userdata.get('TAVILY_SEARCH_API_KEY')\n",
"except ImportError:\n",
" print(\"Not in Google Colab environment\")\n",
" os.environ[get_api_string(PROVIDER)] = os.environ['API_KEY']\n",
"\n",
"for key in ['TOGETHER_API_KEY', 'TAVILY_SEARCH_API_KEY']:\n",
"\n",
"for key in [get_api_string(PROVIDER), 'TAVILY_SEARCH_API_KEY']:\n",
" try:\n",
" api_key = os.environ[key]\n",
" if not api_key:\n",
@ -1132,7 +1150,7 @@
" ) from None\n",
"\n",
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"client = LlamaStackAsLibraryClient(\"together\", provider_data = {\"tavily_search_api_key\": os.environ['TAVILY_SEARCH_API_KEY']})\n",
"client = LlamaStackAsLibraryClient(PROVIDER, provider_data = {\"tavily_search_api_key\": os.environ['TAVILY_SEARCH_API_KEY']})\n",
"_ = client.initialize()"
]
},
@ -1158,6 +1176,9 @@
},
"collapsed": true,
"id": "ruO9jQna_t_S",
"jupyter": {
"outputs_hidden": true
},
"outputId": "ab1722a7-62ab-43bb-9cab-4e45bf62068a"
},
"outputs": [
@ -2162,6 +2183,9 @@
},
"collapsed": true,
"id": "GvVRuhO-GOov",
"jupyter": {
"outputs_hidden": true
},
"outputId": "39395e26-bb7d-4616-d51d-036c8bf41427"
},
"outputs": [
@ -2391,7 +2415,8 @@
"Requirement already satisfied: ptyprocess~=0.7.0 in /usr/local/lib/python3.11/dist-packages (from colab-xterm) (0.7.0)\n",
"Requirement already satisfied: tornado>5.1 in /usr/local/lib/python3.11/dist-packages (from colab-xterm) (6.3.3)\n",
"Downloading colab_xterm-0.2.0-py3-none-any.whl (115 kB)\n",
"\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/115.6 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.6/115.6 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/115.6 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.6/115.6 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hInstalling collected packages: colab-xterm\n",
"Successfully installed colab-xterm-0.2.0\n"
]
@ -2764,7 +2789,19 @@
},
{
"data": {
"application/javascript": "\n (async () => {\n const url = new URL(await google.colab.kernel.proxyPort(10000, {'cache': true}));\n const iframe = document.createElement('iframe');\n iframe.src = url;\n iframe.setAttribute('width', '100%');\n iframe.setAttribute('height', '800');\n iframe.setAttribute('frameborder', 0);\n document.body.appendChild(iframe);\n })();\n ",
"application/javascript": [
"\n",
" (async () => {\n",
" const url = new URL(await google.colab.kernel.proxyPort(10000, {'cache': true}));\n",
" const iframe = document.createElement('iframe');\n",
" iframe.src = url;\n",
" iframe.setAttribute('width', '100%');\n",
" iframe.setAttribute('height', '800');\n",
" iframe.setAttribute('frameborder', 0);\n",
" document.body.appendChild(iframe);\n",
" })();\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
@ -3523,7 +3560,7 @@
}
],
"source": [
"# NBVAL_SKIP \n",
"# NBVAL_SKIP\n",
"print(f\"Getting traces for session_id={session_id}\")\n",
"import json\n",
"\n",
@ -3830,7 +3867,8 @@
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
@ -3843,7 +3881,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.15"
"version": "3.10.16"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {