mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 02:58:21 +00:00
add instructions and code to support more providers in guide
This commit is contained in:
parent
3c1a2c3d66
commit
0e69d71eb9
1 changed files with 9362 additions and 9324 deletions
|
@ -37,7 +37,7 @@
|
|||
"id": "K4AvfUAJZOeS"
|
||||
},
|
||||
"source": [
|
||||
"### 1.1. Create TogetherAI account\n",
|
||||
"### 1.1. Get API Key from Cloud Provider. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"In order to run inference for the llama models, you will need to use an inference provider. Llama stack supports a number of inference [providers](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/inference).\n",
|
||||
|
@ -47,7 +47,7 @@
|
|||
"\n",
|
||||
"Steps [here](https://docs.google.com/document/d/1Vg998IjRW_uujAPnHdQ9jQWvtmkZFt74FldW2MblxPY/edit?usp=sharing).\n",
|
||||
"\n",
|
||||
"You can also use Fireworks.ai or even Ollama if you would like to.\n",
|
||||
"You can also use Fireworks.ai, Bedrock, Cerebras, Nvidia, Sambanova or even Ollama if you would like to.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
@ -79,6 +79,9 @@
|
|||
},
|
||||
"collapsed": true,
|
||||
"id": "J2kGed0R5PSf",
|
||||
"jupyter": {
|
||||
"outputs_hidden": true
|
||||
},
|
||||
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
||||
},
|
||||
"outputs": [
|
||||
|
@ -198,12 +201,12 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "414301dc",
|
||||
"id": "5b5c4486-4602-4f08-8914-e66770d62461",
|
||||
"metadata": {
|
||||
"id": "414301dc"
|
||||
},
|
||||
"source": [
|
||||
"### 1.3. Configure Llama Stack for Together\n",
|
||||
"### 1.3. Configure Llama Stack for the provider\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Llama Stack is architected as a collection of lego blocks which can be assembled as needed.\n",
|
||||
|
@ -226,6 +229,9 @@
|
|||
},
|
||||
"collapsed": true,
|
||||
"id": "HaepEZXCDgif",
|
||||
"jupyter": {
|
||||
"outputs_hidden": true
|
||||
},
|
||||
"outputId": "9314f698-593d-4c1a-ea15-15c735dc1023"
|
||||
},
|
||||
"outputs": [
|
||||
|
@ -552,9 +558,10 @@
|
|||
],
|
||||
"source": [
|
||||
"# NBVAL_SKIP\n",
|
||||
"\n",
|
||||
"# Choose the provider from our list of supported provider ['bedrock','together','fireworks','cerebras','hf-endpoint','nvidia','sambanova']\n",
|
||||
"PROVIDER = 'together'\n",
|
||||
"# This will build all the dependencies you will need\n",
|
||||
"!llama stack build --template together --image-type venv"
|
||||
"!llama stack build --template PROVIDER --image-type venv"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -566,7 +573,7 @@
|
|||
"source": [
|
||||
"### 1.4. Initialize Llama Stack\n",
|
||||
"\n",
|
||||
"Now that all dependencies have been installed, we can initialize llama stack. We will first set the `TOGETHER_API_KEY` environment variable\n"
|
||||
"Now that all dependencies have been installed, we can initialize llama stack. We will first set our cloud api key as the `API_KEY` environment variable and the search api key as `TAVILY_SEARCH_API_KEY` environment variable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -703,6 +710,9 @@
|
|||
},
|
||||
"collapsed": true,
|
||||
"id": "E1UFuJC570Tk",
|
||||
"jupyter": {
|
||||
"outputs_hidden": true
|
||||
},
|
||||
"outputId": "aebb69d4-c167-4de5-eb8a-dd19dd538f63"
|
||||
},
|
||||
"outputs": [
|
||||
|
@ -1111,15 +1121,23 @@
|
|||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"def get_api_string(provider):\n",
|
||||
" assert(provider in ['bedrock','together','fireworks','cerebras','hf-endpoint','nvidia','sambanova'])\n",
|
||||
" if PROVIDER == \"hf-endpoint\":\n",
|
||||
" return 'HF_API_TOKEN'\n",
|
||||
" else:\n",
|
||||
" return provider.upper()+ '_API_KEY'\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" from google.colab import userdata\n",
|
||||
" os.environ['TOGETHER_API_KEY'] = userdata.get('TOGETHER_API_KEY')\n",
|
||||
" os.environ[get_api_string(PROVIDER)] = userdata.get('API_KEY')\n",
|
||||
" os.environ['TAVILY_SEARCH_API_KEY'] = userdata.get('TAVILY_SEARCH_API_KEY')\n",
|
||||
"except ImportError:\n",
|
||||
" print(\"Not in Google Colab environment\")\n",
|
||||
" os.environ[get_api_string(PROVIDER)] = os.environ['API_KEY']\n",
|
||||
"\n",
|
||||
"for key in ['TOGETHER_API_KEY', 'TAVILY_SEARCH_API_KEY']:\n",
|
||||
"\n",
|
||||
"for key in [get_api_string(PROVIDER), 'TAVILY_SEARCH_API_KEY']:\n",
|
||||
" try:\n",
|
||||
" api_key = os.environ[key]\n",
|
||||
" if not api_key:\n",
|
||||
|
@ -1132,7 +1150,7 @@
|
|||
" ) from None\n",
|
||||
"\n",
|
||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
||||
"client = LlamaStackAsLibraryClient(\"together\", provider_data = {\"tavily_search_api_key\": os.environ['TAVILY_SEARCH_API_KEY']})\n",
|
||||
"client = LlamaStackAsLibraryClient(PROVIDER, provider_data = {\"tavily_search_api_key\": os.environ['TAVILY_SEARCH_API_KEY']})\n",
|
||||
"_ = client.initialize()"
|
||||
]
|
||||
},
|
||||
|
@ -1158,6 +1176,9 @@
|
|||
},
|
||||
"collapsed": true,
|
||||
"id": "ruO9jQna_t_S",
|
||||
"jupyter": {
|
||||
"outputs_hidden": true
|
||||
},
|
||||
"outputId": "ab1722a7-62ab-43bb-9cab-4e45bf62068a"
|
||||
},
|
||||
"outputs": [
|
||||
|
@ -2162,6 +2183,9 @@
|
|||
},
|
||||
"collapsed": true,
|
||||
"id": "GvVRuhO-GOov",
|
||||
"jupyter": {
|
||||
"outputs_hidden": true
|
||||
},
|
||||
"outputId": "39395e26-bb7d-4616-d51d-036c8bf41427"
|
||||
},
|
||||
"outputs": [
|
||||
|
@ -2391,7 +2415,8 @@
|
|||
"Requirement already satisfied: ptyprocess~=0.7.0 in /usr/local/lib/python3.11/dist-packages (from colab-xterm) (0.7.0)\n",
|
||||
"Requirement already satisfied: tornado>5.1 in /usr/local/lib/python3.11/dist-packages (from colab-xterm) (6.3.3)\n",
|
||||
"Downloading colab_xterm-0.2.0-py3-none-any.whl (115 kB)\n",
|
||||
"\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/115.6 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.6/115.6 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/115.6 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.6/115.6 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hInstalling collected packages: colab-xterm\n",
|
||||
"Successfully installed colab-xterm-0.2.0\n"
|
||||
]
|
||||
|
@ -2764,7 +2789,19 @@
|
|||
},
|
||||
{
|
||||
"data": {
|
||||
"application/javascript": "\n (async () => {\n const url = new URL(await google.colab.kernel.proxyPort(10000, {'cache': true}));\n const iframe = document.createElement('iframe');\n iframe.src = url;\n iframe.setAttribute('width', '100%');\n iframe.setAttribute('height', '800');\n iframe.setAttribute('frameborder', 0);\n document.body.appendChild(iframe);\n })();\n ",
|
||||
"application/javascript": [
|
||||
"\n",
|
||||
" (async () => {\n",
|
||||
" const url = new URL(await google.colab.kernel.proxyPort(10000, {'cache': true}));\n",
|
||||
" const iframe = document.createElement('iframe');\n",
|
||||
" iframe.src = url;\n",
|
||||
" iframe.setAttribute('width', '100%');\n",
|
||||
" iframe.setAttribute('height', '800');\n",
|
||||
" iframe.setAttribute('frameborder', 0);\n",
|
||||
" document.body.appendChild(iframe);\n",
|
||||
" })();\n",
|
||||
" "
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Javascript object>"
|
||||
]
|
||||
|
@ -3523,7 +3560,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"# NBVAL_SKIP \n",
|
||||
"# NBVAL_SKIP\n",
|
||||
"print(f\"Getting traces for session_id={session_id}\")\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
|
@ -3830,7 +3867,8 @@
|
|||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
|
@ -3843,7 +3881,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
"version": "3.10.16"
|
||||
},
|
||||
"widgets": {
|
||||
"application/vnd.jupyter.widget-state+json": {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue