From 5e31e32cdf30bcee9a6f2e4cdb3bf95b59e9165d Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Mon, 29 Sep 2025 12:27:51 -0700 Subject: [PATCH] update notebook --- .../notebooks/crewai/Llama_Stack_CrewAI.ipynb | 265 +++++++++--------- 1 file changed, 126 insertions(+), 139 deletions(-) diff --git a/docs/notebooks/crewai/Llama_Stack_CrewAI.ipynb b/docs/notebooks/crewai/Llama_Stack_CrewAI.ipynb index 98d554372..89b49ccb3 100644 --- a/docs/notebooks/crewai/Llama_Stack_CrewAI.ipynb +++ b/docs/notebooks/crewai/Llama_Stack_CrewAI.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "5b6a6a17-b931-4bea-8273-0d6e5563637a", "metadata": { "colab": { @@ -59,7 +59,7 @@ ] }, { - "name": "stdout", + "name": "stdin", "output_type": "stream", "text": [ "TOGETHER_API_KEY environment variable is not set. Please enter your API key: ········\n" @@ -109,7 +109,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "dd2dacf3-ec8b-4cc7-8ff4-b5b6ea4a6e9e", "metadata": { "colab": { @@ -120,68 +120,30 @@ "outputId": "aa53f96a-6826-4bfb-d1aa-2c0ec2dd4893", "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Environment '/Users/kaiwu/work/kaiwu/llama-stack/.venv' already exists, re-using it.\n", - "Virtual environment /Users/kaiwu/work/kaiwu/llama-stack/.venv is already active\n", - "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n", - "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 186ms\u001b[0m\u001b[0m\n", - "Installing pip dependencies\n", - "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n", - "\u001b[2K\u001b[2mResolved \u001b[1m185 packages\u001b[0m \u001b[2min 1.29s\u001b[0m\u001b[0m \u001b[0m\n", - "\u001b[2mUninstalled \u001b[1m3 packages\u001b[0m \u001b[2min 285ms\u001b[0m\u001b[0m\n", - "\u001b[2K\u001b[2mInstalled \u001b[1m3 packages\u001b[0m \u001b[2min 105ms\u001b[0m\u001b[0m \u001b[0m\n", - " \u001b[31m-\u001b[39m \u001b[1mprotobuf\u001b[0m\u001b[2m==5.29.5\u001b[0m\n", - " \u001b[32m+\u001b[39m \u001b[1mprotobuf\u001b[0m\u001b[2m==5.29.4\u001b[0m\n", - " \u001b[31m-\u001b[39m \u001b[1mruamel-yaml\u001b[0m\u001b[2m==0.18.14\u001b[0m\n", - " \u001b[32m+\u001b[39m \u001b[1mruamel-yaml\u001b[0m\u001b[2m==0.17.40\u001b[0m\n", - " \u001b[31m-\u001b[39m \u001b[1mruff\u001b[0m\u001b[2m==0.12.5\u001b[0m\n", - " \u001b[32m+\u001b[39m \u001b[1mruff\u001b[0m\u001b[2m==0.9.10\u001b[0m\n", - "Installing special provider module: torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu\n", - "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n", - "\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 66ms\u001b[0m\u001b[0m\n", - "Installing special provider module: sentence-transformers --no-deps\n", - "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n", - "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 63ms\u001b[0m\u001b[0m\n", - "Installing special provider module: torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu\n", - "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n", - "\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 95ms\u001b[0m\u001b[0m\n", - "\u001b[32mBuild Successful!\u001b[0m\n", - "\u001b[34mYou can find the newly-built distribution here: /Users/kaiwu/.llama/distributions/starter/starter-run.yaml\u001b[0m\n", - "\u001b[32mYou can run the new Llama Stack distro via: \u001b[34mllama stack run /Users/kaiwu/.llama/distributions/starter/starter-run.yaml --image-type venv\u001b[0m\u001b[0m\n", - "Starting Llama Stack server with PID: 26783\n", - "Waiting for server to start..........\n", - "Server is ready!\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "import subprocess\n", "import time\n", "\n", - "\n", + "# Remove UV_SYSTEM_PYTHON to ensure uv creates a proper virtual environment\n", + "# instead of trying to use system Python globally, which could cause permission issues\n", + "# and package conflicts with the system's Python installation\n", "if \"UV_SYSTEM_PYTHON\" in os.environ:\n", " del os.environ[\"UV_SYSTEM_PYTHON\"]\n", "\n", - "# this command installs all the dependencies needed for the llama stack server with the together inference provider\n", - "!uv run --with llama-stack llama stack build --distro starter --image-type venv\n", - "\n", - "\n", "def run_llama_stack_server_background():\n", + " \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n", " log_file = open(\"llama_stack_server.log\", \"w\")\n", " process = subprocess.Popen(\n", - " \"uv run --with llama-stack llama stack run starter --image-type venv\",\n", + " \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n", " shell=True,\n", " stdout=log_file,\n", " stderr=log_file,\n", " text=True,\n", " )\n", "\n", - " print(f\"Starting Llama Stack server with PID: {process.pid}\")\n", + " print(f\"Building and starting Llama Stack server with PID: {process.pid}\")\n", " return process\n", "\n", "\n", @@ -208,10 +170,28 @@ " return False\n", "\n", "\n", - "# use this helper if needed to kill the server\n", "def kill_llama_stack_server():\n", - " # Kill any existing llama stack server processes\n", - " os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n", + " # Kill any existing llama stack server processes using pkill command\n", + " os.system(\"pkill -f llama_stack.core.server.server\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7f1494b7-938c-4338-9ae0-c463d2bc2eea", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Building and starting Llama Stack server with PID: 52433\n", + "Waiting for server to start........\n", + "Server is ready!\n" + ] + } + ], + "source": [ "server_process = run_llama_stack_server_background()\n", "assert wait_for_server_to_start()" ] @@ -253,9 +233,9 @@ "id": "vwhexjy1e8o" }, "source": [ - "#### Explore Available Models and Safety Features\n", + "#### Explore Available Models \n", "\n", - "Check what models and safety shields are available through your LlamaStack instance." + "Check what models are available through your LlamaStack instance." ] }, { @@ -268,102 +248,113 @@ "scrolled": true }, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Available models:\n" - ] - }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/models \"HTTP/1.1 200 OK\"\n", - "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/shields \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/models \"HTTP/1.1 200 OK\"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "- llama_api/Llama-3.3-70B-Instruct\n", - "- llama_api/Llama-4-Maverick-17B-128E-Instruct-FP8\n", - "- llama_api/Llama-4-Scout-17B-16E-Instruct-FP8\n", - "- meta-llama/Llama-3.3-70B-Instruct\n", - "- meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n", - "- meta-llama/Llama-4-Scout-17B-16E-Instruct-FP8\n", - "- fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct\n", - "- fireworks/accounts/fireworks/models/llama-v3p1-70b-instruct\n", - "- fireworks/accounts/fireworks/models/llama-v3p1-405b-instruct\n", - "- fireworks/accounts/fireworks/models/llama-v3p2-3b-instruct\n", - "- fireworks/accounts/fireworks/models/llama-v3p2-11b-vision-instruct\n", - "- fireworks/accounts/fireworks/models/llama-v3p2-90b-vision-instruct\n", - "- fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct\n", - "- fireworks/accounts/fireworks/models/llama4-scout-instruct-basic\n", - "- fireworks/accounts/fireworks/models/llama4-maverick-instruct-basic\n", - "- fireworks/nomic-ai/nomic-embed-text-v1.5\n", - "- fireworks/accounts/fireworks/models/llama-guard-3-8b\n", - "- fireworks/accounts/fireworks/models/llama-guard-3-11b-vision\n", - "- together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n", - "- together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n", - "- together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n", - "- together/meta-llama/Llama-3.2-3B-Instruct-Turbo\n", - "- together/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo\n", - "- together/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\n", - "- together/meta-llama/Llama-3.3-70B-Instruct-Turbo\n", - "- together/togethercomputer/m2-bert-80M-8k-retrieval\n", - "- together/togethercomputer/m2-bert-80M-32k-retrieval\n", - "- together/meta-llama/Llama-4-Scout-17B-16E-Instruct\n", - "- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n", - "- together/meta-llama/Llama-Guard-3-8B\n", - "- together/meta-llama/Llama-Guard-3-11B-Vision-Turbo\n", + "Available models:\n", "- bedrock/meta.llama3-1-8b-instruct-v1:0\n", "- bedrock/meta.llama3-1-70b-instruct-v1:0\n", "- bedrock/meta.llama3-1-405b-instruct-v1:0\n", - "- openai/gpt-3.5-turbo-0125\n", - "- openai/gpt-3.5-turbo\n", - "- openai/gpt-3.5-turbo-instruct\n", - "- openai/gpt-4\n", - "- openai/gpt-4-turbo\n", - "- openai/gpt-4o\n", - "- openai/gpt-4o-2024-08-06\n", - "- openai/gpt-4o-mini\n", - "- openai/gpt-4o-audio-preview\n", - "- openai/chatgpt-4o-latest\n", - "- openai/o1\n", - "- openai/o1-mini\n", - "- openai/o3-mini\n", - "- openai/o4-mini\n", - "- openai/text-embedding-3-small\n", - "- openai/text-embedding-3-large\n", - "- anthropic/claude-3-5-sonnet-latest\n", - "- anthropic/claude-3-7-sonnet-latest\n", - "- anthropic/claude-3-5-haiku-latest\n", - "- anthropic/voyage-3\n", - "- anthropic/voyage-3-lite\n", - "- anthropic/voyage-code-3\n", - "- gemini/gemini-1.5-flash\n", - "- gemini/gemini-1.5-pro\n", - "- gemini/gemini-2.0-flash\n", - "- gemini/gemini-2.0-flash-lite\n", - "- gemini/gemini-2.5-flash\n", - "- gemini/gemini-2.5-flash-lite\n", - "- gemini/gemini-2.5-pro\n", - "- gemini/text-embedding-004\n", - "- groq/llama3-8b-8192\n", - "- groq/llama-3.1-8b-instant\n", - "- groq/llama3-70b-8192\n", - "- groq/llama-3.3-70b-versatile\n", - "- groq/llama-3.2-3b-preview\n", - "- groq/meta-llama/llama-4-scout-17b-16e-instruct\n", - "- groq/meta-llama/llama-4-maverick-17b-128e-instruct\n", - "- sambanova/Meta-Llama-3.1-8B-Instruct\n", - "- sambanova/Meta-Llama-3.3-70B-Instruct\n", - "- sambanova/Llama-4-Maverick-17B-128E-Instruct\n", "- sentence-transformers/all-MiniLM-L6-v2\n", - "----\n", - "Available shields (safety models):\n", + "- together/Alibaba-NLP/gte-modernbert-base\n", + "- together/arcee-ai/AFM-4.5B\n", + "- together/arcee-ai/coder-large\n", + "- together/arcee-ai/maestro-reasoning\n", + "- together/arcee-ai/virtuoso-large\n", + "- together/arcee_ai/arcee-spotlight\n", + "- together/arize-ai/qwen-2-1.5b-instruct\n", + "- together/BAAI/bge-base-en-v1.5\n", + "- together/BAAI/bge-large-en-v1.5\n", + "- together/black-forest-labs/FLUX.1-dev\n", + "- together/black-forest-labs/FLUX.1-dev-lora\n", + "- together/black-forest-labs/FLUX.1-kontext-dev\n", + "- together/black-forest-labs/FLUX.1-kontext-max\n", + "- together/black-forest-labs/FLUX.1-kontext-pro\n", + "- together/black-forest-labs/FLUX.1-krea-dev\n", + "- together/black-forest-labs/FLUX.1-pro\n", + "- together/black-forest-labs/FLUX.1-schnell\n", + "- together/black-forest-labs/FLUX.1-schnell-Free\n", + "- together/black-forest-labs/FLUX.1.1-pro\n", + "- together/cartesia/sonic\n", + "- together/cartesia/sonic-2\n", + "- together/deepcogito/cogito-v2-preview-deepseek-671b\n", + "- together/deepcogito/cogito-v2-preview-llama-109B-MoE\n", + "- together/deepcogito/cogito-v2-preview-llama-405B\n", + "- together/deepcogito/cogito-v2-preview-llama-70B\n", + "- together/deepseek-ai/DeepSeek-R1\n", + "- together/deepseek-ai/DeepSeek-R1-0528-tput\n", + "- together/deepseek-ai/DeepSeek-R1-Distill-Llama-70B\n", + "- together/deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free\n", + "- together/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B\n", + "- together/deepseek-ai/DeepSeek-V3\n", + "- together/deepseek-ai/DeepSeek-V3.1\n", + "- together/google/gemma-3n-E4B-it\n", + "- together/intfloat/multilingual-e5-large-instruct\n", + "- together/lgai/exaone-3-5-32b-instruct\n", + "- together/lgai/exaone-deep-32b\n", + "- together/marin-community/marin-8b-instruct\n", + "- together/meta-llama/Llama-2-70b-hf\n", + "- together/meta-llama/Llama-3-70b-chat-hf\n", + "- together/meta-llama/Llama-3-70b-hf\n", + "- together/meta-llama/Llama-3.1-405B-Instruct\n", + "- together/meta-llama/Llama-3.2-1B-Instruct\n", + "- together/meta-llama/Llama-3.2-3B-Instruct-Turbo\n", + "- together/meta-llama/Llama-3.3-70B-Instruct-Turbo\n", + "- together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free\n", + "- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n", + "- together/meta-llama/Llama-4-Scout-17B-16E-Instruct\n", + "- together/meta-llama/Llama-Guard-3-11B-Vision-Turbo\n", + "- together/meta-llama/Llama-Guard-4-12B\n", + "- together/meta-llama/LlamaGuard-2-8b\n", + "- together/meta-llama/Meta-Llama-3-70B-Instruct-Turbo\n", + "- together/meta-llama/Meta-Llama-3-8B-Instruct\n", + "- together/meta-llama/Meta-Llama-3-8B-Instruct-Lite\n", + "- together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n", + "- together/meta-llama/Meta-Llama-3.1-70B-Instruct-Reference\n", + "- together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n", + "- together/meta-llama/Meta-Llama-3.1-8B-Instruct-Reference\n", + "- together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n", + "- together/meta-llama/Meta-Llama-Guard-3-8B\n", + "- together/mistralai/Mistral-7B-Instruct-v0.1\n", + "- together/mistralai/Mistral-7B-Instruct-v0.2\n", + "- together/mistralai/Mistral-7B-Instruct-v0.3\n", + "- together/mistralai/Mistral-Small-24B-Instruct-2501\n", + "- together/mistralai/Mixtral-8x7B-Instruct-v0.1\n", + "- together/mixedbread-ai/Mxbai-Rerank-Large-V2\n", + "- together/moonshotai/Kimi-K2-Instruct\n", + "- together/moonshotai/Kimi-K2-Instruct-0905\n", + "- together/openai/gpt-oss-120b\n", + "- together/openai/gpt-oss-20b\n", + "- together/openai/whisper-large-v3\n", + "- together/Qwen/Qwen2.5-72B-Instruct\n", + "- together/Qwen/Qwen2.5-72B-Instruct-Turbo\n", + "- together/Qwen/Qwen2.5-7B-Instruct-Turbo\n", + "- together/Qwen/Qwen2.5-Coder-32B-Instruct\n", + "- together/Qwen/Qwen2.5-VL-72B-Instruct\n", + "- together/Qwen/Qwen3-235B-A22B-fp8-tput\n", + "- together/Qwen/Qwen3-235B-A22B-Instruct-2507-tput\n", + "- together/Qwen/Qwen3-235B-A22B-Thinking-2507\n", + "- together/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8\n", + "- together/Qwen/Qwen3-Next-80B-A3B-Instruct\n", + "- together/Qwen/Qwen3-Next-80B-A3B-Thinking\n", + "- together/Qwen/QwQ-32B\n", + "- together/Salesforce/Llama-Rank-V1\n", + "- together/scb10x/scb10x-typhoon-2-1-gemma3-12b\n", + "- together/togethercomputer/m2-bert-80M-32k-retrieval\n", + "- together/togethercomputer/MoA-1\n", + "- together/togethercomputer/MoA-1-Turbo\n", + "- together/togethercomputer/Refuel-Llm-V2\n", + "- together/togethercomputer/Refuel-Llm-V2-Small\n", + "- together/Virtue-AI/VirtueGuard-Text-Lite\n", + "- together/zai-org/GLM-4.5-Air-FP8\n", "----\n" ] } @@ -373,10 +364,6 @@ "for m in client.models.list():\n", " print(f\"- {m.identifier}\")\n", "\n", - "print(\"----\")\n", - "print(\"Available shields (safety models):\")\n", - "for s in client.shields.list():\n", - " print(s.identifier)\n", "print(\"----\")" ] },