mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
update notebook
This commit is contained in:
parent
ac1af2d96a
commit
5e31e32cdf
1 changed files with 126 additions and 139 deletions
|
@ -38,7 +38,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"id": "5b6a6a17-b931-4bea-8273-0d6e5563637a",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
|
@ -59,7 +59,7 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"TOGETHER_API_KEY environment variable is not set. Please enter your API key: ········\n"
|
||||
|
@ -109,7 +109,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"id": "dd2dacf3-ec8b-4cc7-8ff4-b5b6ea4a6e9e",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
|
@ -120,68 +120,30 @@
|
|||
"outputId": "aa53f96a-6826-4bfb-d1aa-2c0ec2dd4893",
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Environment '/Users/kaiwu/work/kaiwu/llama-stack/.venv' already exists, re-using it.\n",
|
||||
"Virtual environment /Users/kaiwu/work/kaiwu/llama-stack/.venv is already active\n",
|
||||
"\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
|
||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 186ms\u001b[0m\u001b[0m\n",
|
||||
"Installing pip dependencies\n",
|
||||
"\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
|
||||
"\u001b[2K\u001b[2mResolved \u001b[1m185 packages\u001b[0m \u001b[2min 1.29s\u001b[0m\u001b[0m \u001b[0m\n",
|
||||
"\u001b[2mUninstalled \u001b[1m3 packages\u001b[0m \u001b[2min 285ms\u001b[0m\u001b[0m\n",
|
||||
"\u001b[2K\u001b[2mInstalled \u001b[1m3 packages\u001b[0m \u001b[2min 105ms\u001b[0m\u001b[0m \u001b[0m\n",
|
||||
" \u001b[31m-\u001b[39m \u001b[1mprotobuf\u001b[0m\u001b[2m==5.29.5\u001b[0m\n",
|
||||
" \u001b[32m+\u001b[39m \u001b[1mprotobuf\u001b[0m\u001b[2m==5.29.4\u001b[0m\n",
|
||||
" \u001b[31m-\u001b[39m \u001b[1mruamel-yaml\u001b[0m\u001b[2m==0.18.14\u001b[0m\n",
|
||||
" \u001b[32m+\u001b[39m \u001b[1mruamel-yaml\u001b[0m\u001b[2m==0.17.40\u001b[0m\n",
|
||||
" \u001b[31m-\u001b[39m \u001b[1mruff\u001b[0m\u001b[2m==0.12.5\u001b[0m\n",
|
||||
" \u001b[32m+\u001b[39m \u001b[1mruff\u001b[0m\u001b[2m==0.9.10\u001b[0m\n",
|
||||
"Installing special provider module: torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu\n",
|
||||
"\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
|
||||
"\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 66ms\u001b[0m\u001b[0m\n",
|
||||
"Installing special provider module: sentence-transformers --no-deps\n",
|
||||
"\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
|
||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 63ms\u001b[0m\u001b[0m\n",
|
||||
"Installing special provider module: torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu\n",
|
||||
"\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
|
||||
"\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 95ms\u001b[0m\u001b[0m\n",
|
||||
"\u001b[32mBuild Successful!\u001b[0m\n",
|
||||
"\u001b[34mYou can find the newly-built distribution here: /Users/kaiwu/.llama/distributions/starter/starter-run.yaml\u001b[0m\n",
|
||||
"\u001b[32mYou can run the new Llama Stack distro via: \u001b[34mllama stack run /Users/kaiwu/.llama/distributions/starter/starter-run.yaml --image-type venv\u001b[0m\u001b[0m\n",
|
||||
"Starting Llama Stack server with PID: 26783\n",
|
||||
"Waiting for server to start..........\n",
|
||||
"Server is ready!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import subprocess\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Remove UV_SYSTEM_PYTHON to ensure uv creates a proper virtual environment\n",
|
||||
"# instead of trying to use system Python globally, which could cause permission issues\n",
|
||||
"# and package conflicts with the system's Python installation\n",
|
||||
"if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
|
||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||
"\n",
|
||||
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
|
||||
"!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def run_llama_stack_server_background():\n",
|
||||
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
" process = subprocess.Popen(\n",
|
||||
" \"uv run --with llama-stack llama stack run starter --image-type venv\",\n",
|
||||
" \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
|
||||
" shell=True,\n",
|
||||
" stdout=log_file,\n",
|
||||
" stderr=log_file,\n",
|
||||
" text=True,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
|
||||
" print(f\"Building and starting Llama Stack server with PID: {process.pid}\")\n",
|
||||
" return process\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
@ -208,10 +170,28 @@
|
|||
" return False\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# use this helper if needed to kill the server\n",
|
||||
"def kill_llama_stack_server():\n",
|
||||
" # Kill any existing llama stack server processes\n",
|
||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n",
|
||||
" # Kill any existing llama stack server processes using pkill command\n",
|
||||
" os.system(\"pkill -f llama_stack.core.server.server\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "7f1494b7-938c-4338-9ae0-c463d2bc2eea",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Building and starting Llama Stack server with PID: 52433\n",
|
||||
"Waiting for server to start........\n",
|
||||
"Server is ready!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"server_process = run_llama_stack_server_background()\n",
|
||||
"assert wait_for_server_to_start()"
|
||||
]
|
||||
|
@ -253,9 +233,9 @@
|
|||
"id": "vwhexjy1e8o"
|
||||
},
|
||||
"source": [
|
||||
"#### Explore Available Models and Safety Features\n",
|
||||
"#### Explore Available Models \n",
|
||||
"\n",
|
||||
"Check what models and safety shields are available through your LlamaStack instance."
|
||||
"Check what models are available through your LlamaStack instance."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -268,102 +248,113 @@
|
|||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Available models:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/models \"HTTP/1.1 200 OK\"\n",
|
||||
"INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/shields \"HTTP/1.1 200 OK\"\n"
|
||||
"INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/models \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"- llama_api/Llama-3.3-70B-Instruct\n",
|
||||
"- llama_api/Llama-4-Maverick-17B-128E-Instruct-FP8\n",
|
||||
"- llama_api/Llama-4-Scout-17B-16E-Instruct-FP8\n",
|
||||
"- meta-llama/Llama-3.3-70B-Instruct\n",
|
||||
"- meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n",
|
||||
"- meta-llama/Llama-4-Scout-17B-16E-Instruct-FP8\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-v3p1-70b-instruct\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-v3p1-405b-instruct\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-v3p2-3b-instruct\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-v3p2-11b-vision-instruct\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-v3p2-90b-vision-instruct\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct\n",
|
||||
"- fireworks/accounts/fireworks/models/llama4-scout-instruct-basic\n",
|
||||
"- fireworks/accounts/fireworks/models/llama4-maverick-instruct-basic\n",
|
||||
"- fireworks/nomic-ai/nomic-embed-text-v1.5\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-guard-3-8b\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-guard-3-11b-vision\n",
|
||||
"- together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n",
|
||||
"- together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n",
|
||||
"- together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n",
|
||||
"- together/meta-llama/Llama-3.2-3B-Instruct-Turbo\n",
|
||||
"- together/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo\n",
|
||||
"- together/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\n",
|
||||
"- together/meta-llama/Llama-3.3-70B-Instruct-Turbo\n",
|
||||
"- together/togethercomputer/m2-bert-80M-8k-retrieval\n",
|
||||
"- together/togethercomputer/m2-bert-80M-32k-retrieval\n",
|
||||
"- together/meta-llama/Llama-4-Scout-17B-16E-Instruct\n",
|
||||
"- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n",
|
||||
"- together/meta-llama/Llama-Guard-3-8B\n",
|
||||
"- together/meta-llama/Llama-Guard-3-11B-Vision-Turbo\n",
|
||||
"Available models:\n",
|
||||
"- bedrock/meta.llama3-1-8b-instruct-v1:0\n",
|
||||
"- bedrock/meta.llama3-1-70b-instruct-v1:0\n",
|
||||
"- bedrock/meta.llama3-1-405b-instruct-v1:0\n",
|
||||
"- openai/gpt-3.5-turbo-0125\n",
|
||||
"- openai/gpt-3.5-turbo\n",
|
||||
"- openai/gpt-3.5-turbo-instruct\n",
|
||||
"- openai/gpt-4\n",
|
||||
"- openai/gpt-4-turbo\n",
|
||||
"- openai/gpt-4o\n",
|
||||
"- openai/gpt-4o-2024-08-06\n",
|
||||
"- openai/gpt-4o-mini\n",
|
||||
"- openai/gpt-4o-audio-preview\n",
|
||||
"- openai/chatgpt-4o-latest\n",
|
||||
"- openai/o1\n",
|
||||
"- openai/o1-mini\n",
|
||||
"- openai/o3-mini\n",
|
||||
"- openai/o4-mini\n",
|
||||
"- openai/text-embedding-3-small\n",
|
||||
"- openai/text-embedding-3-large\n",
|
||||
"- anthropic/claude-3-5-sonnet-latest\n",
|
||||
"- anthropic/claude-3-7-sonnet-latest\n",
|
||||
"- anthropic/claude-3-5-haiku-latest\n",
|
||||
"- anthropic/voyage-3\n",
|
||||
"- anthropic/voyage-3-lite\n",
|
||||
"- anthropic/voyage-code-3\n",
|
||||
"- gemini/gemini-1.5-flash\n",
|
||||
"- gemini/gemini-1.5-pro\n",
|
||||
"- gemini/gemini-2.0-flash\n",
|
||||
"- gemini/gemini-2.0-flash-lite\n",
|
||||
"- gemini/gemini-2.5-flash\n",
|
||||
"- gemini/gemini-2.5-flash-lite\n",
|
||||
"- gemini/gemini-2.5-pro\n",
|
||||
"- gemini/text-embedding-004\n",
|
||||
"- groq/llama3-8b-8192\n",
|
||||
"- groq/llama-3.1-8b-instant\n",
|
||||
"- groq/llama3-70b-8192\n",
|
||||
"- groq/llama-3.3-70b-versatile\n",
|
||||
"- groq/llama-3.2-3b-preview\n",
|
||||
"- groq/meta-llama/llama-4-scout-17b-16e-instruct\n",
|
||||
"- groq/meta-llama/llama-4-maverick-17b-128e-instruct\n",
|
||||
"- sambanova/Meta-Llama-3.1-8B-Instruct\n",
|
||||
"- sambanova/Meta-Llama-3.3-70B-Instruct\n",
|
||||
"- sambanova/Llama-4-Maverick-17B-128E-Instruct\n",
|
||||
"- sentence-transformers/all-MiniLM-L6-v2\n",
|
||||
"----\n",
|
||||
"Available shields (safety models):\n",
|
||||
"- together/Alibaba-NLP/gte-modernbert-base\n",
|
||||
"- together/arcee-ai/AFM-4.5B\n",
|
||||
"- together/arcee-ai/coder-large\n",
|
||||
"- together/arcee-ai/maestro-reasoning\n",
|
||||
"- together/arcee-ai/virtuoso-large\n",
|
||||
"- together/arcee_ai/arcee-spotlight\n",
|
||||
"- together/arize-ai/qwen-2-1.5b-instruct\n",
|
||||
"- together/BAAI/bge-base-en-v1.5\n",
|
||||
"- together/BAAI/bge-large-en-v1.5\n",
|
||||
"- together/black-forest-labs/FLUX.1-dev\n",
|
||||
"- together/black-forest-labs/FLUX.1-dev-lora\n",
|
||||
"- together/black-forest-labs/FLUX.1-kontext-dev\n",
|
||||
"- together/black-forest-labs/FLUX.1-kontext-max\n",
|
||||
"- together/black-forest-labs/FLUX.1-kontext-pro\n",
|
||||
"- together/black-forest-labs/FLUX.1-krea-dev\n",
|
||||
"- together/black-forest-labs/FLUX.1-pro\n",
|
||||
"- together/black-forest-labs/FLUX.1-schnell\n",
|
||||
"- together/black-forest-labs/FLUX.1-schnell-Free\n",
|
||||
"- together/black-forest-labs/FLUX.1.1-pro\n",
|
||||
"- together/cartesia/sonic\n",
|
||||
"- together/cartesia/sonic-2\n",
|
||||
"- together/deepcogito/cogito-v2-preview-deepseek-671b\n",
|
||||
"- together/deepcogito/cogito-v2-preview-llama-109B-MoE\n",
|
||||
"- together/deepcogito/cogito-v2-preview-llama-405B\n",
|
||||
"- together/deepcogito/cogito-v2-preview-llama-70B\n",
|
||||
"- together/deepseek-ai/DeepSeek-R1\n",
|
||||
"- together/deepseek-ai/DeepSeek-R1-0528-tput\n",
|
||||
"- together/deepseek-ai/DeepSeek-R1-Distill-Llama-70B\n",
|
||||
"- together/deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free\n",
|
||||
"- together/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B\n",
|
||||
"- together/deepseek-ai/DeepSeek-V3\n",
|
||||
"- together/deepseek-ai/DeepSeek-V3.1\n",
|
||||
"- together/google/gemma-3n-E4B-it\n",
|
||||
"- together/intfloat/multilingual-e5-large-instruct\n",
|
||||
"- together/lgai/exaone-3-5-32b-instruct\n",
|
||||
"- together/lgai/exaone-deep-32b\n",
|
||||
"- together/marin-community/marin-8b-instruct\n",
|
||||
"- together/meta-llama/Llama-2-70b-hf\n",
|
||||
"- together/meta-llama/Llama-3-70b-chat-hf\n",
|
||||
"- together/meta-llama/Llama-3-70b-hf\n",
|
||||
"- together/meta-llama/Llama-3.1-405B-Instruct\n",
|
||||
"- together/meta-llama/Llama-3.2-1B-Instruct\n",
|
||||
"- together/meta-llama/Llama-3.2-3B-Instruct-Turbo\n",
|
||||
"- together/meta-llama/Llama-3.3-70B-Instruct-Turbo\n",
|
||||
"- together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free\n",
|
||||
"- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n",
|
||||
"- together/meta-llama/Llama-4-Scout-17B-16E-Instruct\n",
|
||||
"- together/meta-llama/Llama-Guard-3-11B-Vision-Turbo\n",
|
||||
"- together/meta-llama/Llama-Guard-4-12B\n",
|
||||
"- together/meta-llama/LlamaGuard-2-8b\n",
|
||||
"- together/meta-llama/Meta-Llama-3-70B-Instruct-Turbo\n",
|
||||
"- together/meta-llama/Meta-Llama-3-8B-Instruct\n",
|
||||
"- together/meta-llama/Meta-Llama-3-8B-Instruct-Lite\n",
|
||||
"- together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n",
|
||||
"- together/meta-llama/Meta-Llama-3.1-70B-Instruct-Reference\n",
|
||||
"- together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n",
|
||||
"- together/meta-llama/Meta-Llama-3.1-8B-Instruct-Reference\n",
|
||||
"- together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n",
|
||||
"- together/meta-llama/Meta-Llama-Guard-3-8B\n",
|
||||
"- together/mistralai/Mistral-7B-Instruct-v0.1\n",
|
||||
"- together/mistralai/Mistral-7B-Instruct-v0.2\n",
|
||||
"- together/mistralai/Mistral-7B-Instruct-v0.3\n",
|
||||
"- together/mistralai/Mistral-Small-24B-Instruct-2501\n",
|
||||
"- together/mistralai/Mixtral-8x7B-Instruct-v0.1\n",
|
||||
"- together/mixedbread-ai/Mxbai-Rerank-Large-V2\n",
|
||||
"- together/moonshotai/Kimi-K2-Instruct\n",
|
||||
"- together/moonshotai/Kimi-K2-Instruct-0905\n",
|
||||
"- together/openai/gpt-oss-120b\n",
|
||||
"- together/openai/gpt-oss-20b\n",
|
||||
"- together/openai/whisper-large-v3\n",
|
||||
"- together/Qwen/Qwen2.5-72B-Instruct\n",
|
||||
"- together/Qwen/Qwen2.5-72B-Instruct-Turbo\n",
|
||||
"- together/Qwen/Qwen2.5-7B-Instruct-Turbo\n",
|
||||
"- together/Qwen/Qwen2.5-Coder-32B-Instruct\n",
|
||||
"- together/Qwen/Qwen2.5-VL-72B-Instruct\n",
|
||||
"- together/Qwen/Qwen3-235B-A22B-fp8-tput\n",
|
||||
"- together/Qwen/Qwen3-235B-A22B-Instruct-2507-tput\n",
|
||||
"- together/Qwen/Qwen3-235B-A22B-Thinking-2507\n",
|
||||
"- together/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8\n",
|
||||
"- together/Qwen/Qwen3-Next-80B-A3B-Instruct\n",
|
||||
"- together/Qwen/Qwen3-Next-80B-A3B-Thinking\n",
|
||||
"- together/Qwen/QwQ-32B\n",
|
||||
"- together/Salesforce/Llama-Rank-V1\n",
|
||||
"- together/scb10x/scb10x-typhoon-2-1-gemma3-12b\n",
|
||||
"- together/togethercomputer/m2-bert-80M-32k-retrieval\n",
|
||||
"- together/togethercomputer/MoA-1\n",
|
||||
"- together/togethercomputer/MoA-1-Turbo\n",
|
||||
"- together/togethercomputer/Refuel-Llm-V2\n",
|
||||
"- together/togethercomputer/Refuel-Llm-V2-Small\n",
|
||||
"- together/Virtue-AI/VirtueGuard-Text-Lite\n",
|
||||
"- together/zai-org/GLM-4.5-Air-FP8\n",
|
||||
"----\n"
|
||||
]
|
||||
}
|
||||
|
@ -373,10 +364,6 @@
|
|||
"for m in client.models.list():\n",
|
||||
" print(f\"- {m.identifier}\")\n",
|
||||
"\n",
|
||||
"print(\"----\")\n",
|
||||
"print(\"Available shields (safety models):\")\n",
|
||||
"for s in client.shields.list():\n",
|
||||
" print(s.identifier)\n",
|
||||
"print(\"----\")"
|
||||
]
|
||||
},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue