update notebook

2025-10-03 19:57:35 +00:00 · 2025-09-29 12:27:51 -07:00 · 2025-09-29 12:27:51 -07:00 · 5e31e32cdf
commit 5e31e32cdf
parent ac1af2d96a
1 changed files with 126 additions and 139 deletions
--- a/docs/notebooks/crewai/Llama_Stack_CrewAI.ipynb
+++ b/docs/notebooks/crewai/Llama_Stack_CrewAI.ipynb
@ -38,7 +38,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "5b6a6a17-b931-4bea-8273-0d6e5563637a",
   "metadata": {
    "colab": {
@ -59,7 +59,7 @@
     ]
    },
    {
-     "name": "stdout",
+     "name": "stdin",
     "output_type": "stream",
     "text": [
      "TOGETHER_API_KEY environment variable is not set. Please enter your API key:  ········\n"
@ -109,7 +109,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "dd2dacf3-ec8b-4cc7-8ff4-b5b6ea4a6e9e",
   "metadata": {
    "colab": {
@ -120,68 +120,30 @@
    "outputId": "aa53f96a-6826-4bfb-d1aa-2c0ec2dd4893",
    "scrolled": true
   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Environment '/Users/kaiwu/work/kaiwu/llama-stack/.venv' already exists, re-using it.\n",
-      "Virtual environment /Users/kaiwu/work/kaiwu/llama-stack/.venv is already active\n",
-      "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
-      "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 186ms\u001b[0m\u001b[0m\n",
-      "Installing pip dependencies\n",
-      "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
-      "\u001b[2K\u001b[2mResolved \u001b[1m185 packages\u001b[0m \u001b[2min 1.29s\u001b[0m\u001b[0m                                       \u001b[0m\n",
-      "\u001b[2mUninstalled \u001b[1m3 packages\u001b[0m \u001b[2min 285ms\u001b[0m\u001b[0m\n",
-      "\u001b[2K\u001b[2mInstalled \u001b[1m3 packages\u001b[0m \u001b[2min 105ms\u001b[0m\u001b[0m                               \u001b[0m\n",
-      " \u001b[31m-\u001b[39m \u001b[1mprotobuf\u001b[0m\u001b[2m==5.29.5\u001b[0m\n",
-      " \u001b[32m+\u001b[39m \u001b[1mprotobuf\u001b[0m\u001b[2m==5.29.4\u001b[0m\n",
-      " \u001b[31m-\u001b[39m \u001b[1mruamel-yaml\u001b[0m\u001b[2m==0.18.14\u001b[0m\n",
-      " \u001b[32m+\u001b[39m \u001b[1mruamel-yaml\u001b[0m\u001b[2m==0.17.40\u001b[0m\n",
-      " \u001b[31m-\u001b[39m \u001b[1mruff\u001b[0m\u001b[2m==0.12.5\u001b[0m\n",
-      " \u001b[32m+\u001b[39m \u001b[1mruff\u001b[0m\u001b[2m==0.9.10\u001b[0m\n",
-      "Installing special provider module: torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu\n",
-      "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
-      "\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 66ms\u001b[0m\u001b[0m\n",
-      "Installing special provider module: sentence-transformers --no-deps\n",
-      "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
-      "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 63ms\u001b[0m\u001b[0m\n",
-      "Installing special provider module: torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu\n",
-      "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n",
-      "\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 95ms\u001b[0m\u001b[0m\n",
-      "\u001b[32mBuild Successful!\u001b[0m\n",
-      "\u001b[34mYou can find the newly-built distribution here: /Users/kaiwu/.llama/distributions/starter/starter-run.yaml\u001b[0m\n",
-      "\u001b[32mYou can run the new Llama Stack distro via: \u001b[34mllama stack run /Users/kaiwu/.llama/distributions/starter/starter-run.yaml --image-type venv\u001b[0m\u001b[0m\n",
-      "Starting Llama Stack server with PID: 26783\n",
-      "Waiting for server to start..........\n",
-      "Server is ready!\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "import os\n",
    "import subprocess\n",
    "import time\n",
    "\n",
-    "\n",
+    "# Remove UV_SYSTEM_PYTHON to ensure uv creates a proper virtual environment\n",
+    "# instead of trying to use system Python globally, which could cause permission issues\n",
+    "# and package conflicts with the system's Python installation\n",
    "if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
    "    del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
    "\n",
-    "# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
-    "!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
-    "\n",
-    "\n",
    "def run_llama_stack_server_background():\n",
+    "    \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
    "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
    "    process = subprocess.Popen(\n",
-    "        \"uv run --with llama-stack llama stack run starter --image-type venv\",\n",
+    "        \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
    "        shell=True,\n",
    "        stdout=log_file,\n",
    "        stderr=log_file,\n",
    "        text=True,\n",
    "    )\n",
    "\n",
-    "    print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
+    "    print(f\"Building and starting Llama Stack server with PID: {process.pid}\")\n",
    "    return process\n",
    "\n",
    "\n",
@ -208,10 +170,28 @@
    "    return False\n",
    "\n",
    "\n",
-    "# use this helper if needed to kill the server\n",
    "def kill_llama_stack_server():\n",
-    "    # Kill any existing llama stack server processes\n",
-    "    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n",
+    "    # Kill any existing llama stack server processes using pkill command\n",
+    "    os.system(\"pkill -f llama_stack.core.server.server\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "7f1494b7-938c-4338-9ae0-c463d2bc2eea",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Building and starting Llama Stack server with PID: 52433\n",
+      "Waiting for server to start........\n",
+      "Server is ready!\n"
+     ]
+    }
+   ],
+   "source": [
    "server_process = run_llama_stack_server_background()\n",
    "assert wait_for_server_to_start()"
   ]
@ -253,9 +233,9 @@
    "id": "vwhexjy1e8o"
   },
   "source": [
-    "#### Explore Available Models and Safety Features\n",
+    "#### Explore Available Models \n",
    "\n",
-    "Check what models and safety shields are available through your LlamaStack instance."
+    "Check what models are available through your LlamaStack instance."
   ]
  },
  {
@ -268,102 +248,113 @@
    "scrolled": true
   },
   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Available models:\n"
-     ]
-    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/models \"HTTP/1.1 200 OK\"\n",
-      "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/shields \"HTTP/1.1 200 OK\"\n"
+      "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/models \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "- llama_api/Llama-3.3-70B-Instruct\n",
-      "- llama_api/Llama-4-Maverick-17B-128E-Instruct-FP8\n",
-      "- llama_api/Llama-4-Scout-17B-16E-Instruct-FP8\n",
-      "- meta-llama/Llama-3.3-70B-Instruct\n",
-      "- meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n",
-      "- meta-llama/Llama-4-Scout-17B-16E-Instruct-FP8\n",
-      "- fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct\n",
-      "- fireworks/accounts/fireworks/models/llama-v3p1-70b-instruct\n",
-      "- fireworks/accounts/fireworks/models/llama-v3p1-405b-instruct\n",
-      "- fireworks/accounts/fireworks/models/llama-v3p2-3b-instruct\n",
-      "- fireworks/accounts/fireworks/models/llama-v3p2-11b-vision-instruct\n",
-      "- fireworks/accounts/fireworks/models/llama-v3p2-90b-vision-instruct\n",
-      "- fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct\n",
-      "- fireworks/accounts/fireworks/models/llama4-scout-instruct-basic\n",
-      "- fireworks/accounts/fireworks/models/llama4-maverick-instruct-basic\n",
-      "- fireworks/nomic-ai/nomic-embed-text-v1.5\n",
-      "- fireworks/accounts/fireworks/models/llama-guard-3-8b\n",
-      "- fireworks/accounts/fireworks/models/llama-guard-3-11b-vision\n",
-      "- together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n",
-      "- together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n",
-      "- together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n",
-      "- together/meta-llama/Llama-3.2-3B-Instruct-Turbo\n",
-      "- together/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo\n",
-      "- together/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\n",
-      "- together/meta-llama/Llama-3.3-70B-Instruct-Turbo\n",
-      "- together/togethercomputer/m2-bert-80M-8k-retrieval\n",
-      "- together/togethercomputer/m2-bert-80M-32k-retrieval\n",
-      "- together/meta-llama/Llama-4-Scout-17B-16E-Instruct\n",
-      "- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n",
-      "- together/meta-llama/Llama-Guard-3-8B\n",
-      "- together/meta-llama/Llama-Guard-3-11B-Vision-Turbo\n",
+      "Available models:\n",
      "- bedrock/meta.llama3-1-8b-instruct-v1:0\n",
      "- bedrock/meta.llama3-1-70b-instruct-v1:0\n",
      "- bedrock/meta.llama3-1-405b-instruct-v1:0\n",
-      "- openai/gpt-3.5-turbo-0125\n",
-      "- openai/gpt-3.5-turbo\n",
-      "- openai/gpt-3.5-turbo-instruct\n",
-      "- openai/gpt-4\n",
-      "- openai/gpt-4-turbo\n",
-      "- openai/gpt-4o\n",
-      "- openai/gpt-4o-2024-08-06\n",
-      "- openai/gpt-4o-mini\n",
-      "- openai/gpt-4o-audio-preview\n",
-      "- openai/chatgpt-4o-latest\n",
-      "- openai/o1\n",
-      "- openai/o1-mini\n",
-      "- openai/o3-mini\n",
-      "- openai/o4-mini\n",
-      "- openai/text-embedding-3-small\n",
-      "- openai/text-embedding-3-large\n",
-      "- anthropic/claude-3-5-sonnet-latest\n",
-      "- anthropic/claude-3-7-sonnet-latest\n",
-      "- anthropic/claude-3-5-haiku-latest\n",
-      "- anthropic/voyage-3\n",
-      "- anthropic/voyage-3-lite\n",
-      "- anthropic/voyage-code-3\n",
-      "- gemini/gemini-1.5-flash\n",
-      "- gemini/gemini-1.5-pro\n",
-      "- gemini/gemini-2.0-flash\n",
-      "- gemini/gemini-2.0-flash-lite\n",
-      "- gemini/gemini-2.5-flash\n",
-      "- gemini/gemini-2.5-flash-lite\n",
-      "- gemini/gemini-2.5-pro\n",
-      "- gemini/text-embedding-004\n",
-      "- groq/llama3-8b-8192\n",
-      "- groq/llama-3.1-8b-instant\n",
-      "- groq/llama3-70b-8192\n",
-      "- groq/llama-3.3-70b-versatile\n",
-      "- groq/llama-3.2-3b-preview\n",
-      "- groq/meta-llama/llama-4-scout-17b-16e-instruct\n",
-      "- groq/meta-llama/llama-4-maverick-17b-128e-instruct\n",
-      "- sambanova/Meta-Llama-3.1-8B-Instruct\n",
-      "- sambanova/Meta-Llama-3.3-70B-Instruct\n",
-      "- sambanova/Llama-4-Maverick-17B-128E-Instruct\n",
      "- sentence-transformers/all-MiniLM-L6-v2\n",
-      "----\n",
-      "Available shields (safety models):\n",
+      "- together/Alibaba-NLP/gte-modernbert-base\n",
+      "- together/arcee-ai/AFM-4.5B\n",
+      "- together/arcee-ai/coder-large\n",
+      "- together/arcee-ai/maestro-reasoning\n",
+      "- together/arcee-ai/virtuoso-large\n",
+      "- together/arcee_ai/arcee-spotlight\n",
+      "- together/arize-ai/qwen-2-1.5b-instruct\n",
+      "- together/BAAI/bge-base-en-v1.5\n",
+      "- together/BAAI/bge-large-en-v1.5\n",
+      "- together/black-forest-labs/FLUX.1-dev\n",
+      "- together/black-forest-labs/FLUX.1-dev-lora\n",
+      "- together/black-forest-labs/FLUX.1-kontext-dev\n",
+      "- together/black-forest-labs/FLUX.1-kontext-max\n",
+      "- together/black-forest-labs/FLUX.1-kontext-pro\n",
+      "- together/black-forest-labs/FLUX.1-krea-dev\n",
+      "- together/black-forest-labs/FLUX.1-pro\n",
+      "- together/black-forest-labs/FLUX.1-schnell\n",
+      "- together/black-forest-labs/FLUX.1-schnell-Free\n",
+      "- together/black-forest-labs/FLUX.1.1-pro\n",
+      "- together/cartesia/sonic\n",
+      "- together/cartesia/sonic-2\n",
+      "- together/deepcogito/cogito-v2-preview-deepseek-671b\n",
+      "- together/deepcogito/cogito-v2-preview-llama-109B-MoE\n",
+      "- together/deepcogito/cogito-v2-preview-llama-405B\n",
+      "- together/deepcogito/cogito-v2-preview-llama-70B\n",
+      "- together/deepseek-ai/DeepSeek-R1\n",
+      "- together/deepseek-ai/DeepSeek-R1-0528-tput\n",
+      "- together/deepseek-ai/DeepSeek-R1-Distill-Llama-70B\n",
+      "- together/deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free\n",
+      "- together/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B\n",
+      "- together/deepseek-ai/DeepSeek-V3\n",
+      "- together/deepseek-ai/DeepSeek-V3.1\n",
+      "- together/google/gemma-3n-E4B-it\n",
+      "- together/intfloat/multilingual-e5-large-instruct\n",
+      "- together/lgai/exaone-3-5-32b-instruct\n",
+      "- together/lgai/exaone-deep-32b\n",
+      "- together/marin-community/marin-8b-instruct\n",
+      "- together/meta-llama/Llama-2-70b-hf\n",
+      "- together/meta-llama/Llama-3-70b-chat-hf\n",
+      "- together/meta-llama/Llama-3-70b-hf\n",
+      "- together/meta-llama/Llama-3.1-405B-Instruct\n",
+      "- together/meta-llama/Llama-3.2-1B-Instruct\n",
+      "- together/meta-llama/Llama-3.2-3B-Instruct-Turbo\n",
+      "- together/meta-llama/Llama-3.3-70B-Instruct-Turbo\n",
+      "- together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free\n",
+      "- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n",
+      "- together/meta-llama/Llama-4-Scout-17B-16E-Instruct\n",
+      "- together/meta-llama/Llama-Guard-3-11B-Vision-Turbo\n",
+      "- together/meta-llama/Llama-Guard-4-12B\n",
+      "- together/meta-llama/LlamaGuard-2-8b\n",
+      "- together/meta-llama/Meta-Llama-3-70B-Instruct-Turbo\n",
+      "- together/meta-llama/Meta-Llama-3-8B-Instruct\n",
+      "- together/meta-llama/Meta-Llama-3-8B-Instruct-Lite\n",
+      "- together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n",
+      "- together/meta-llama/Meta-Llama-3.1-70B-Instruct-Reference\n",
+      "- together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n",
+      "- together/meta-llama/Meta-Llama-3.1-8B-Instruct-Reference\n",
+      "- together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n",
+      "- together/meta-llama/Meta-Llama-Guard-3-8B\n",
+      "- together/mistralai/Mistral-7B-Instruct-v0.1\n",
+      "- together/mistralai/Mistral-7B-Instruct-v0.2\n",
+      "- together/mistralai/Mistral-7B-Instruct-v0.3\n",
+      "- together/mistralai/Mistral-Small-24B-Instruct-2501\n",
+      "- together/mistralai/Mixtral-8x7B-Instruct-v0.1\n",
+      "- together/mixedbread-ai/Mxbai-Rerank-Large-V2\n",
+      "- together/moonshotai/Kimi-K2-Instruct\n",
+      "- together/moonshotai/Kimi-K2-Instruct-0905\n",
+      "- together/openai/gpt-oss-120b\n",
+      "- together/openai/gpt-oss-20b\n",
+      "- together/openai/whisper-large-v3\n",
+      "- together/Qwen/Qwen2.5-72B-Instruct\n",
+      "- together/Qwen/Qwen2.5-72B-Instruct-Turbo\n",
+      "- together/Qwen/Qwen2.5-7B-Instruct-Turbo\n",
+      "- together/Qwen/Qwen2.5-Coder-32B-Instruct\n",
+      "- together/Qwen/Qwen2.5-VL-72B-Instruct\n",
+      "- together/Qwen/Qwen3-235B-A22B-fp8-tput\n",
+      "- together/Qwen/Qwen3-235B-A22B-Instruct-2507-tput\n",
+      "- together/Qwen/Qwen3-235B-A22B-Thinking-2507\n",
+      "- together/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8\n",
+      "- together/Qwen/Qwen3-Next-80B-A3B-Instruct\n",
+      "- together/Qwen/Qwen3-Next-80B-A3B-Thinking\n",
+      "- together/Qwen/QwQ-32B\n",
+      "- together/Salesforce/Llama-Rank-V1\n",
+      "- together/scb10x/scb10x-typhoon-2-1-gemma3-12b\n",
+      "- together/togethercomputer/m2-bert-80M-32k-retrieval\n",
+      "- together/togethercomputer/MoA-1\n",
+      "- together/togethercomputer/MoA-1-Turbo\n",
+      "- together/togethercomputer/Refuel-Llm-V2\n",
+      "- together/togethercomputer/Refuel-Llm-V2-Small\n",
+      "- together/Virtue-AI/VirtueGuard-Text-Lite\n",
+      "- together/zai-org/GLM-4.5-Air-FP8\n",
      "----\n"
     ]
    }
@ -373,10 +364,6 @@
    "for m in client.models.list():\n",
    "    print(f\"- {m.identifier}\")\n",
    "\n",
-    "print(\"----\")\n",
-    "print(\"Available shields (safety models):\")\n",
-    "for s in client.shields.list():\n",
-    "    print(s.identifier)\n",
    "print(\"----\")"
   ]
  },