From dc7df60d42cf3efdca35ef355c50691ed3ca54bb Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Fri, 4 Jul 2025 23:13:39 -0400 Subject: [PATCH 1/2] docs: Update starter docs to include milvus inline (#2631) --- .../self_hosted_distro/starter.md | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/source/distributions/self_hosted_distro/starter.md b/docs/source/distributions/self_hosted_distro/starter.md index 1138318b3..753746d84 100644 --- a/docs/source/distributions/self_hosted_distro/starter.md +++ b/docs/source/distributions/self_hosted_distro/starter.md @@ -17,18 +17,18 @@ The `llamastack/distribution-starter` distribution is a comprehensive, multi-pro The starter distribution consists of the following provider configurations: -| API | Provider(s) | -|-----|-------------| -| agents | `inline::meta-reference` | -| datasetio | `remote::huggingface`, `inline::localfs` | -| eval | `inline::meta-reference` | -| files | `inline::localfs` | +| API | Provider(s) | +|-----|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| agents | `inline::meta-reference` | +| datasetio | `remote::huggingface`, `inline::localfs` | +| eval | `inline::meta-reference` | +| files | `inline::localfs` | | inference | `remote::openai`, `remote::fireworks`, `remote::together`, `remote::ollama`, `remote::anthropic`, `remote::gemini`, `remote::groq`, `remote::sambanova`, `remote::vllm`, `remote::tgi`, `remote::cerebras`, `remote::llama-openai-compat`, `remote::nvidia`, `remote::hf::serverless`, `remote::hf::endpoint`, `inline::sentence-transformers` | -| safety | `inline::llama-guard` | -| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | -| telemetry | `inline::meta-reference` | -| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` | -| vector_io | `inline::faiss`, `inline::sqlite-vec`, `remote::chromadb`, `remote::pgvector` | +| safety | `inline::llama-guard` | +| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | +| telemetry | `inline::meta-reference` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` | +| vector_io | `inline::faiss`, `inline::sqlite-vec`, `inline::milvus`, `remote::chromadb`, `remote::pgvector` | ## Inference Providers From c025cab3a34034c26bc67f85e7879ecfaca62ded Mon Sep 17 00:00:00 2001 From: Wen Zhou Date: Sat, 5 Jul 2025 05:14:57 +0200 Subject: [PATCH 2/2] docs: update docs to use "starter" than "ollama" (#2629) --- docs/getting_started_llama4.ipynb | 2 +- docs/quick_start.ipynb | 21 +++++++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/docs/getting_started_llama4.ipynb b/docs/getting_started_llama4.ipynb index edefda28c..82aef6039 100644 --- a/docs/getting_started_llama4.ipynb +++ b/docs/getting_started_llama4.ipynb @@ -55,7 +55,7 @@ "\n", "MODEL=\"Llama-4-Scout-17B-16E-Instruct\"\n", "# get meta url from llama.com\n", - "!uv run --with llama-stackllama model download --source meta --model-id $MODEL --meta-url \n", + "!uv run --with llama-stack llama model download --source meta --model-id $MODEL --meta-url \n", "\n", "model_id = f\"meta-llama/{MODEL}\"" ] diff --git a/docs/quick_start.ipynb b/docs/quick_start.ipynb index 4ae1dbe8d..91cfb569c 100644 --- a/docs/quick_start.ipynb +++ b/docs/quick_start.ipynb @@ -145,12 +145,12 @@ " del os.environ[\"UV_SYSTEM_PYTHON\"]\n", "\n", "# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n", - "!uv run --with llama-stack llama stack build --template ollama --image-type venv --image-name myvenv\n", + "!uv run --with llama-stack llama stack build --template starter --image-type venv\n", "\n", "def run_llama_stack_server_background():\n", " log_file = open(\"llama_stack_server.log\", \"w\")\n", " process = subprocess.Popen(\n", - " f\"uv run --with llama-stack llama stack run ollama --image-type venv --image-name myvenv --env INFERENCE_MODEL=llama3.2:3b\",\n", + " f\"uv run --with llama-stack llama stack run starter --image-type venv --env INFERENCE_MODEL=llama3.2:3b\",\n", " shell=True,\n", " stdout=log_file,\n", " stderr=log_file,\n", @@ -249,18 +249,23 @@ ], "source": [ "from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n", + "import os\n", + "\n", + "os.environ[\"ENABLE_OLLAMA\"] = \"ollama\"\n", + "os.environ[\"OLLAMA_INFERENCE_MODEL\"] = \"llama3.2:3b\"\n", + "os.environ[\"OLLAMA_EMBEDDING_MODEL\"] = \"all-minilm:l6-v2\"\n", + "os.environ[\"OLLAMA_EMBEDDING_DIMENSION\"] = \"384\"\n", "\n", "vector_db_id = \"my_demo_vector_db\"\n", "client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n", "\n", "models = client.models.list()\n", "\n", - "# Select the first LLM and first embedding models\n", - "model_id = next(m for m in models if m.model_type == \"llm\").identifier\n", - "embedding_model_id = (\n", - " em := next(m for m in models if m.model_type == \"embedding\")\n", - ").identifier\n", - "embedding_dimension = em.metadata[\"embedding_dimension\"]\n", + "# Select the first ollama and first ollama's embedding model\n", + "model_id = next(m for m in models if m.model_type == \"llm\" and m.provider_id == \"ollama\").identifier\n", + "embedding_model = next(m for m in models if m.model_type == \"embedding\" and m.provider_id == \"ollama\")\n", + "embedding_model_id = embedding_model.identifier\n", + "embedding_dimension = embedding_model.metadata[\"embedding_dimension\"]\n", "\n", "_ = client.vector_dbs.register(\n", " vector_db_id=vector_db_id,\n",