update notebook

This commit is contained in:
Kai Wu 2025-09-29 12:27:51 -07:00
parent ac1af2d96a
commit 5e31e32cdf

View file

@ -38,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"id": "5b6a6a17-b931-4bea-8273-0d6e5563637a",
"metadata": {
"colab": {
@ -59,7 +59,7 @@
]
},
{
"name": "stdout",
"name": "stdin",
"output_type": "stream",
"text": [
"TOGETHER_API_KEY environment variable is not set. Please enter your API key: ········\n"
@ -109,7 +109,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"id": "dd2dacf3-ec8b-4cc7-8ff4-b5b6ea4a6e9e",
"metadata": {
"colab": {
@ -120,68 +120,30 @@
"outputId": "aa53f96a-6826-4bfb-d1aa-2c0ec2dd4893",
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Environment '/Users/kaiwu/work/kaiwu/llama-stack/.venv' already exists, re-using it.\n",
"Virtual environment /Users/kaiwu/work/kaiwu/llama-stack/.venv is already active\n",
"\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 186ms\u001b[0m\u001b[0m\n",
"Installing pip dependencies\n",
"\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
"\u001b[2K\u001b[2mResolved \u001b[1m185 packages\u001b[0m \u001b[2min 1.29s\u001b[0m\u001b[0m \u001b[0m\n",
"\u001b[2mUninstalled \u001b[1m3 packages\u001b[0m \u001b[2min 285ms\u001b[0m\u001b[0m\n",
"\u001b[2K\u001b[2mInstalled \u001b[1m3 packages\u001b[0m \u001b[2min 105ms\u001b[0m\u001b[0m \u001b[0m\n",
" \u001b[31m-\u001b[39m \u001b[1mprotobuf\u001b[0m\u001b[2m==5.29.5\u001b[0m\n",
" \u001b[32m+\u001b[39m \u001b[1mprotobuf\u001b[0m\u001b[2m==5.29.4\u001b[0m\n",
" \u001b[31m-\u001b[39m \u001b[1mruamel-yaml\u001b[0m\u001b[2m==0.18.14\u001b[0m\n",
" \u001b[32m+\u001b[39m \u001b[1mruamel-yaml\u001b[0m\u001b[2m==0.17.40\u001b[0m\n",
" \u001b[31m-\u001b[39m \u001b[1mruff\u001b[0m\u001b[2m==0.12.5\u001b[0m\n",
" \u001b[32m+\u001b[39m \u001b[1mruff\u001b[0m\u001b[2m==0.9.10\u001b[0m\n",
"Installing special provider module: torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu\n",
"\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
"\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 66ms\u001b[0m\u001b[0m\n",
"Installing special provider module: sentence-transformers --no-deps\n",
"\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 63ms\u001b[0m\u001b[0m\n",
"Installing special provider module: torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu\n",
"\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
"\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 95ms\u001b[0m\u001b[0m\n",
"\u001b[32mBuild Successful!\u001b[0m\n",
"\u001b[34mYou can find the newly-built distribution here: /Users/kaiwu/.llama/distributions/starter/starter-run.yaml\u001b[0m\n",
"\u001b[32mYou can run the new Llama Stack distro via: \u001b[34mllama stack run /Users/kaiwu/.llama/distributions/starter/starter-run.yaml --image-type venv\u001b[0m\u001b[0m\n",
"Starting Llama Stack server with PID: 26783\n",
"Waiting for server to start..........\n",
"Server is ready!\n"
]
}
],
"outputs": [],
"source": [
"import os\n",
"import subprocess\n",
"import time\n",
"\n",
"\n",
"# Remove UV_SYSTEM_PYTHON to ensure uv creates a proper virtual environment\n",
"# instead of trying to use system Python globally, which could cause permission issues\n",
"# and package conflicts with the system's Python installation\n",
"if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
"\n",
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
"!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
"\n",
"\n",
"def run_llama_stack_server_background():\n",
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
" process = subprocess.Popen(\n",
" \"uv run --with llama-stack llama stack run starter --image-type venv\",\n",
" \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
" shell=True,\n",
" stdout=log_file,\n",
" stderr=log_file,\n",
" text=True,\n",
" )\n",
"\n",
" print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
" print(f\"Building and starting Llama Stack server with PID: {process.pid}\")\n",
" return process\n",
"\n",
"\n",
@ -208,10 +170,28 @@
" return False\n",
"\n",
"\n",
"# use this helper if needed to kill the server\n",
"def kill_llama_stack_server():\n",
" # Kill any existing llama stack server processes\n",
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n",
" # Kill any existing llama stack server processes using pkill command\n",
" os.system(\"pkill -f llama_stack.core.server.server\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "7f1494b7-938c-4338-9ae0-c463d2bc2eea",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Building and starting Llama Stack server with PID: 52433\n",
"Waiting for server to start........\n",
"Server is ready!\n"
]
}
],
"source": [
"server_process = run_llama_stack_server_background()\n",
"assert wait_for_server_to_start()"
]
@ -253,9 +233,9 @@
"id": "vwhexjy1e8o"
},
"source": [
"#### Explore Available Models and Safety Features\n",
"#### Explore Available Models \n",
"\n",
"Check what models and safety shields are available through your LlamaStack instance."
"Check what models are available through your LlamaStack instance."
]
},
{
@ -268,102 +248,113 @@
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Available models:\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/models \"HTTP/1.1 200 OK\"\n",
"INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/shields \"HTTP/1.1 200 OK\"\n"
"INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/models \"HTTP/1.1 200 OK\"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"- llama_api/Llama-3.3-70B-Instruct\n",
"- llama_api/Llama-4-Maverick-17B-128E-Instruct-FP8\n",
"- llama_api/Llama-4-Scout-17B-16E-Instruct-FP8\n",
"- meta-llama/Llama-3.3-70B-Instruct\n",
"- meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n",
"- meta-llama/Llama-4-Scout-17B-16E-Instruct-FP8\n",
"- fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct\n",
"- fireworks/accounts/fireworks/models/llama-v3p1-70b-instruct\n",
"- fireworks/accounts/fireworks/models/llama-v3p1-405b-instruct\n",
"- fireworks/accounts/fireworks/models/llama-v3p2-3b-instruct\n",
"- fireworks/accounts/fireworks/models/llama-v3p2-11b-vision-instruct\n",
"- fireworks/accounts/fireworks/models/llama-v3p2-90b-vision-instruct\n",
"- fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct\n",
"- fireworks/accounts/fireworks/models/llama4-scout-instruct-basic\n",
"- fireworks/accounts/fireworks/models/llama4-maverick-instruct-basic\n",
"- fireworks/nomic-ai/nomic-embed-text-v1.5\n",
"- fireworks/accounts/fireworks/models/llama-guard-3-8b\n",
"- fireworks/accounts/fireworks/models/llama-guard-3-11b-vision\n",
"- together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n",
"- together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n",
"- together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n",
"- together/meta-llama/Llama-3.2-3B-Instruct-Turbo\n",
"- together/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo\n",
"- together/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\n",
"- together/meta-llama/Llama-3.3-70B-Instruct-Turbo\n",
"- together/togethercomputer/m2-bert-80M-8k-retrieval\n",
"- together/togethercomputer/m2-bert-80M-32k-retrieval\n",
"- together/meta-llama/Llama-4-Scout-17B-16E-Instruct\n",
"- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n",
"- together/meta-llama/Llama-Guard-3-8B\n",
"- together/meta-llama/Llama-Guard-3-11B-Vision-Turbo\n",
"Available models:\n",
"- bedrock/meta.llama3-1-8b-instruct-v1:0\n",
"- bedrock/meta.llama3-1-70b-instruct-v1:0\n",
"- bedrock/meta.llama3-1-405b-instruct-v1:0\n",
"- openai/gpt-3.5-turbo-0125\n",
"- openai/gpt-3.5-turbo\n",
"- openai/gpt-3.5-turbo-instruct\n",
"- openai/gpt-4\n",
"- openai/gpt-4-turbo\n",
"- openai/gpt-4o\n",
"- openai/gpt-4o-2024-08-06\n",
"- openai/gpt-4o-mini\n",
"- openai/gpt-4o-audio-preview\n",
"- openai/chatgpt-4o-latest\n",
"- openai/o1\n",
"- openai/o1-mini\n",
"- openai/o3-mini\n",
"- openai/o4-mini\n",
"- openai/text-embedding-3-small\n",
"- openai/text-embedding-3-large\n",
"- anthropic/claude-3-5-sonnet-latest\n",
"- anthropic/claude-3-7-sonnet-latest\n",
"- anthropic/claude-3-5-haiku-latest\n",
"- anthropic/voyage-3\n",
"- anthropic/voyage-3-lite\n",
"- anthropic/voyage-code-3\n",
"- gemini/gemini-1.5-flash\n",
"- gemini/gemini-1.5-pro\n",
"- gemini/gemini-2.0-flash\n",
"- gemini/gemini-2.0-flash-lite\n",
"- gemini/gemini-2.5-flash\n",
"- gemini/gemini-2.5-flash-lite\n",
"- gemini/gemini-2.5-pro\n",
"- gemini/text-embedding-004\n",
"- groq/llama3-8b-8192\n",
"- groq/llama-3.1-8b-instant\n",
"- groq/llama3-70b-8192\n",
"- groq/llama-3.3-70b-versatile\n",
"- groq/llama-3.2-3b-preview\n",
"- groq/meta-llama/llama-4-scout-17b-16e-instruct\n",
"- groq/meta-llama/llama-4-maverick-17b-128e-instruct\n",
"- sambanova/Meta-Llama-3.1-8B-Instruct\n",
"- sambanova/Meta-Llama-3.3-70B-Instruct\n",
"- sambanova/Llama-4-Maverick-17B-128E-Instruct\n",
"- sentence-transformers/all-MiniLM-L6-v2\n",
"----\n",
"Available shields (safety models):\n",
"- together/Alibaba-NLP/gte-modernbert-base\n",
"- together/arcee-ai/AFM-4.5B\n",
"- together/arcee-ai/coder-large\n",
"- together/arcee-ai/maestro-reasoning\n",
"- together/arcee-ai/virtuoso-large\n",
"- together/arcee_ai/arcee-spotlight\n",
"- together/arize-ai/qwen-2-1.5b-instruct\n",
"- together/BAAI/bge-base-en-v1.5\n",
"- together/BAAI/bge-large-en-v1.5\n",
"- together/black-forest-labs/FLUX.1-dev\n",
"- together/black-forest-labs/FLUX.1-dev-lora\n",
"- together/black-forest-labs/FLUX.1-kontext-dev\n",
"- together/black-forest-labs/FLUX.1-kontext-max\n",
"- together/black-forest-labs/FLUX.1-kontext-pro\n",
"- together/black-forest-labs/FLUX.1-krea-dev\n",
"- together/black-forest-labs/FLUX.1-pro\n",
"- together/black-forest-labs/FLUX.1-schnell\n",
"- together/black-forest-labs/FLUX.1-schnell-Free\n",
"- together/black-forest-labs/FLUX.1.1-pro\n",
"- together/cartesia/sonic\n",
"- together/cartesia/sonic-2\n",
"- together/deepcogito/cogito-v2-preview-deepseek-671b\n",
"- together/deepcogito/cogito-v2-preview-llama-109B-MoE\n",
"- together/deepcogito/cogito-v2-preview-llama-405B\n",
"- together/deepcogito/cogito-v2-preview-llama-70B\n",
"- together/deepseek-ai/DeepSeek-R1\n",
"- together/deepseek-ai/DeepSeek-R1-0528-tput\n",
"- together/deepseek-ai/DeepSeek-R1-Distill-Llama-70B\n",
"- together/deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free\n",
"- together/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B\n",
"- together/deepseek-ai/DeepSeek-V3\n",
"- together/deepseek-ai/DeepSeek-V3.1\n",
"- together/google/gemma-3n-E4B-it\n",
"- together/intfloat/multilingual-e5-large-instruct\n",
"- together/lgai/exaone-3-5-32b-instruct\n",
"- together/lgai/exaone-deep-32b\n",
"- together/marin-community/marin-8b-instruct\n",
"- together/meta-llama/Llama-2-70b-hf\n",
"- together/meta-llama/Llama-3-70b-chat-hf\n",
"- together/meta-llama/Llama-3-70b-hf\n",
"- together/meta-llama/Llama-3.1-405B-Instruct\n",
"- together/meta-llama/Llama-3.2-1B-Instruct\n",
"- together/meta-llama/Llama-3.2-3B-Instruct-Turbo\n",
"- together/meta-llama/Llama-3.3-70B-Instruct-Turbo\n",
"- together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free\n",
"- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n",
"- together/meta-llama/Llama-4-Scout-17B-16E-Instruct\n",
"- together/meta-llama/Llama-Guard-3-11B-Vision-Turbo\n",
"- together/meta-llama/Llama-Guard-4-12B\n",
"- together/meta-llama/LlamaGuard-2-8b\n",
"- together/meta-llama/Meta-Llama-3-70B-Instruct-Turbo\n",
"- together/meta-llama/Meta-Llama-3-8B-Instruct\n",
"- together/meta-llama/Meta-Llama-3-8B-Instruct-Lite\n",
"- together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n",
"- together/meta-llama/Meta-Llama-3.1-70B-Instruct-Reference\n",
"- together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n",
"- together/meta-llama/Meta-Llama-3.1-8B-Instruct-Reference\n",
"- together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n",
"- together/meta-llama/Meta-Llama-Guard-3-8B\n",
"- together/mistralai/Mistral-7B-Instruct-v0.1\n",
"- together/mistralai/Mistral-7B-Instruct-v0.2\n",
"- together/mistralai/Mistral-7B-Instruct-v0.3\n",
"- together/mistralai/Mistral-Small-24B-Instruct-2501\n",
"- together/mistralai/Mixtral-8x7B-Instruct-v0.1\n",
"- together/mixedbread-ai/Mxbai-Rerank-Large-V2\n",
"- together/moonshotai/Kimi-K2-Instruct\n",
"- together/moonshotai/Kimi-K2-Instruct-0905\n",
"- together/openai/gpt-oss-120b\n",
"- together/openai/gpt-oss-20b\n",
"- together/openai/whisper-large-v3\n",
"- together/Qwen/Qwen2.5-72B-Instruct\n",
"- together/Qwen/Qwen2.5-72B-Instruct-Turbo\n",
"- together/Qwen/Qwen2.5-7B-Instruct-Turbo\n",
"- together/Qwen/Qwen2.5-Coder-32B-Instruct\n",
"- together/Qwen/Qwen2.5-VL-72B-Instruct\n",
"- together/Qwen/Qwen3-235B-A22B-fp8-tput\n",
"- together/Qwen/Qwen3-235B-A22B-Instruct-2507-tput\n",
"- together/Qwen/Qwen3-235B-A22B-Thinking-2507\n",
"- together/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8\n",
"- together/Qwen/Qwen3-Next-80B-A3B-Instruct\n",
"- together/Qwen/Qwen3-Next-80B-A3B-Thinking\n",
"- together/Qwen/QwQ-32B\n",
"- together/Salesforce/Llama-Rank-V1\n",
"- together/scb10x/scb10x-typhoon-2-1-gemma3-12b\n",
"- together/togethercomputer/m2-bert-80M-32k-retrieval\n",
"- together/togethercomputer/MoA-1\n",
"- together/togethercomputer/MoA-1-Turbo\n",
"- together/togethercomputer/Refuel-Llm-V2\n",
"- together/togethercomputer/Refuel-Llm-V2-Small\n",
"- together/Virtue-AI/VirtueGuard-Text-Lite\n",
"- together/zai-org/GLM-4.5-Air-FP8\n",
"----\n"
]
}
@ -373,10 +364,6 @@
"for m in client.models.list():\n",
" print(f\"- {m.identifier}\")\n",
"\n",
"print(\"----\")\n",
"print(\"Available shields (safety models):\")\n",
"for s in client.shields.list():\n",
" print(s.identifier)\n",
"print(\"----\")"
]
},