From 8186c880218034e669dbd1c1dc111d8814776119 Mon Sep 17 00:00:00 2001
From: Yuan Tang <terrytangyuan@gmail.com>
Date: Sun, 9 Feb 2025 22:26:36 -0500
Subject: [PATCH 1/7] docs: Render check marks correctly on PyPI (#1024)

# What does this PR do?

The table on the project's PyPI page does not render check marks. This
PR switches to use the unicode symbol directly that can be rendered
correctly on PyPI.

Before:

![image](https://github.com/user-attachments/assets/6d01d440-8722-4c37-8b0a-9ba8c0cdb48d)

After:

![image](https://github.com/user-attachments/assets/3a7153f2-9468-40f6-97a2-17f903de4287)

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
---
 README.md | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index cdf98dc12..a5e5b217d 100644
--- a/README.md
+++ b/README.md
@@ -34,22 +34,22 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on
 ### API Providers
 Here is a list of the various API providers and available distributions to developers started easily,
 
-|                                  **API Provider Builder**                                  |    **Environments**    |     **Agents**     |   **Inference**    |     **Memory**     |     **Safety**     |   **Telemetry**    |
-|:------------------------------------------------------------------------------------------:|:----------------------:|:------------------:|:------------------:|:------------------:|:------------------:|:------------------:|
-|                                       Meta Reference                                       |      Single Node       | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
-|                                          SambaNova                                         |         Hosted         |                    | :heavy_check_mark: |                    |                    |                    |
-|                                          Cerebras                                          |         Hosted         |                    | :heavy_check_mark: |                    |                    |                    |
-|                                         Fireworks                                          |         Hosted         | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |                    |                    |
-|                                        AWS Bedrock                                         |         Hosted         |                    | :heavy_check_mark: |                    | :heavy_check_mark: |                    |
-|                                          Together                                          |         Hosted         | :heavy_check_mark: | :heavy_check_mark: |                    | :heavy_check_mark: |                    |
-|                                            Groq                                            |         Hosted         |                    | :heavy_check_mark: |                    |                    |                    |
-|                                           Ollama                                           |      Single Node       |                    | :heavy_check_mark: |                    |                    |                    |
-|                                            TGI                                             | Hosted and Single Node |                    | :heavy_check_mark: |                    |                    |                    |
-| NVIDIA NIM | Hosted and Single Node |                    | :heavy_check_mark: |                    |                    |                    |
-|                                           Chroma                                           |      Single Node       |                    |                    | :heavy_check_mark: |                    |                    |
-|                                         PG Vector                                          |      Single Node       |                    |                    | :heavy_check_mark: |                    |                    |
-|                                     PyTorch ExecuTorch                                     |     On-device iOS      | :heavy_check_mark: | :heavy_check_mark: |                    |                    |                    |
-|                        vLLM                        | Hosted and Single Node |                    | :heavy_check_mark: |                    |                    |                    |
+| **API Provider Builder** |    **Environments**    | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
+|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|
+|      Meta Reference      |      Single Node       |     ✅      |       ✅       |     ✅      |     ✅      |       ✅       |
+|        SambaNova         |         Hosted         |            |       ✅       |            |            |               |
+|         Cerebras         |         Hosted         |            |       ✅       |            |            |               |
+|        Fireworks         |         Hosted         |     ✅      |       ✅       |     ✅      |            |               |
+|       AWS Bedrock        |         Hosted         |            |       ✅       |            |     ✅      |               |
+|         Together         |         Hosted         |     ✅      |       ✅       |            |     ✅      |               |
+|           Groq           |         Hosted         |            |       ✅       |            |            |               |
+|          Ollama          |      Single Node       |            |       ✅       |            |            |               |
+|           TGI            | Hosted and Single Node |            |       ✅       |            |            |               |
+|        NVIDIA NIM        | Hosted and Single Node |            |       ✅       |            |            |               |
+|          Chroma          |      Single Node       |            |               |     ✅      |            |               |
+|        PG Vector         |      Single Node       |            |               |     ✅      |            |               |
+|    PyTorch ExecuTorch    |     On-device iOS      |     ✅      |       ✅       |            |            |               |
+|           vLLM           | Hosted and Single Node |            |       ✅       |            |            |               |
 
 ### Distributions
 

From 076213165c1005a5348eb96aed0bab3e9d3935f4 Mon Sep 17 00:00:00 2001
From: Michael Clifford <mcliffor@redhat.com>
Date: Mon, 10 Feb 2025 09:25:30 -0500
Subject: [PATCH 2/7] docs: update rag.md example code to prevent errors
 (#1009)

---
 docs/source/building_applications/rag.md | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/docs/source/building_applications/rag.md b/docs/source/building_applications/rag.md
index 6b7a354b7..5287a2367 100644
--- a/docs/source/building_applications/rag.md
+++ b/docs/source/building_applications/rag.md
@@ -36,13 +36,12 @@ chunks = [
         "content": "Your document text here",
         "mime_type": "text/plain",
     },
-    ...,
 ]
-client.vector_io.insert(vector_db_id, chunks)
+client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)
 
 # You can then query for these chunks
 chunks_response = client.vector_io.query(
-    vector_db_id, query="What do you know about..."
+    vector_db_id=vector_db_id, query="What do you know about..."
 )
 ```
 
@@ -72,8 +71,8 @@ client.tool_runtime.rag_tool.insert(
 
 # Query documents
 results = client.tool_runtime.rag_tool.query(
-    vector_db_id=vector_db_id,
-    query="What do you know about...",
+    vector_db_ids=[vector_db_id],
+    content="What do you know about...",
 )
 ```
 
@@ -82,10 +81,14 @@ results = client.tool_runtime.rag_tool.query(
 One of the most powerful patterns is combining agents with RAG capabilities. Here's a complete example:
 
 ```python
+from llama_stack_client.types.agent_create_params import AgentConfig
+from llama_stack_client.lib.agents.agent import Agent
+
 # Configure agent with memory
 agent_config = AgentConfig(
-    model="Llama3.2-3B-Instruct",
+    model="meta-llama/Llama-3.2-3B-Instruct",
     instructions="You are a helpful assistant",
+    enable_session_persistence=False,
     toolgroups=[
         {
             "name": "builtin::rag",
@@ -105,10 +108,10 @@ response = agent.create_turn(
         {"role": "user", "content": "I am providing some documents for reference."}
     ],
     documents=[
-        dict(
-            content="https://raw.githubusercontent.com/example/doc.rst",
-            mime_type="text/plain",
-        )
+        {
+            "content": "https://raw.githubusercontent.com/example/doc.rst",
+            "mime_type": "text/plain",
+        }
     ],
     session_id=session_id,
 )

From 371f11a569e7ad314d208681ff20a405fb514840 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Mon, 10 Feb 2025 17:42:30 +0100
Subject: [PATCH 3/7] build: update uv lock to sync package versions (#1026)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?
[Provide a short summary of what this PR does and why. Link to relevant
issues if applicable.]

Updated `uv.lock` to reflect the latest versions of `llama-models`,
`llama-stack`, and `llama-stack-client` (bumped to 0.1.2). This ensures
dependency consistency and avoids potential issues with outdated package
references.

Added `uv-sync` hook from `uv-pre-commit` repository to ensure
synchronization of dependencies.

Signed-off-by: Sébastien Han <seb@redhat.com>

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
[Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.*]

[//]: # (## Documentation)
[//]: # (- [ ] Added a Changelog entry if the change is significant)

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 .github/workflows/pre-commit.yml |  4 ++++
 .pre-commit-config.yaml          |  1 +
 uv.lock                          | 18 +++++++++---------
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index faa2eda31..046387ab9 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -23,3 +23,7 @@ jobs:
             .pre-commit-config.yaml
 
       - uses: pre-commit/action@v3.0.1
+
+      - name: Verify if there are any diff files after pre-commit
+        run: |
+          git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index adafccf64..bca91081f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -48,6 +48,7 @@ repos:
     hooks:
     -   id: uv-export
         args: ["--frozen", "--no-hashes", "--no-emit-project"]
+    -   id: uv-sync
 
 # -   repo: https://github.com/pre-commit/mirrors-mypy
 #     rev: v1.14.0
diff --git a/uv.lock b/uv.lock
index f492872bc..087396eea 100644
--- a/uv.lock
+++ b/uv.lock
@@ -687,7 +687,7 @@ wheels = [
 
 [[package]]
 name = "llama-models"
-version = "0.1.1"
+version = "0.1.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "jinja2" },
@@ -696,14 +696,14 @@ dependencies = [
     { name = "pyyaml" },
     { name = "tiktoken" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/df/80/4a4595cf5e55f71c0e15b85ff2f4c04b0742bf664ede062a09c9d383bf7b/llama_models-0.1.1.tar.gz", hash = "sha256:7cb5a9fe38485b47aff4c93e183d6d390a676a7619f3355502576b652f17733a", size = 1608412 }
+sdist = { url = "https://files.pythonhosted.org/packages/b5/f2/ed8310d4677cd38ab45ffba45aea2a4e9882b640045ad9c3198ac69e5a85/llama_models-0.1.2.tar.gz", hash = "sha256:1266eaec7a8db336e4ed034d2b494189ccb7fd6d6b7aefe874eee749a4340b9b", size = 1608069 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d9/93/d49dd0f0cd37df1a7a7fb25444d010f626cdf42b21eea11d839b0f6a808a/llama_models-0.1.1-py3-none-any.whl", hash = "sha256:7e4f15dc4f6f011852ea2c42f9770b75140f5eca670b32cc67fc0a4605c55f89", size = 1650981 },
+    { url = "https://files.pythonhosted.org/packages/55/a7/34b9e88ef4109759c8881f43b8006139e3d13d54c440b8c571b253655f54/llama_models-0.1.2-py3-none-any.whl", hash = "sha256:8aa5287d1c6325698991ff677e71148cac347e07493bb5b3ab891e614b89e1f8", size = 1651273 },
 ]
 
 [[package]]
 name = "llama-stack"
-version = "0.1.1"
+version = "0.1.2"
 source = { editable = "." }
 dependencies = [
     { name = "blobfile" },
@@ -751,8 +751,8 @@ requires-dist = [
     { name = "fire" },
     { name = "httpx" },
     { name = "huggingface-hub" },
-    { name = "llama-models", specifier = ">=0.1.1" },
-    { name = "llama-stack-client", specifier = ">=0.1.1" },
+    { name = "llama-models", specifier = ">=0.1.2" },
+    { name = "llama-stack-client", specifier = ">=0.1.2" },
     { name = "myst-parser", marker = "extra == 'docs'" },
     { name = "nbval", marker = "extra == 'dev'" },
     { name = "pre-commit", marker = "extra == 'dev'" },
@@ -780,7 +780,7 @@ requires-dist = [
 
 [[package]]
 name = "llama-stack-client"
-version = "0.1.1"
+version = "0.1.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -797,9 +797,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/07/42/7004958ac1a6da9a8060decf0d9120fdeb3b2775de090a0a473f2ee4a27d/llama_stack_client-0.1.1.tar.gz", hash = "sha256:3e549a848ade959d342fa52ec49b1913b7bb615a77b5b8dcaefe6ff94409049e", size = 179729 }
+sdist = { url = "https://files.pythonhosted.org/packages/9e/75/8b41a3026c871a8650cd8d2cfda9f891a9163458813574f36518bb40afe4/llama_stack_client-0.1.2.tar.gz", hash = "sha256:94277ddae52be557d771dcdc15d85af9012b5aa87439dd69ec1dc0ff486b0c8e", size = 188023 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/80/66/5255c09dc001ff437fd6fe6fad27142035b60073df243f7df0494095f605/llama_stack_client-0.1.1-py3-none-any.whl", hash = "sha256:e07d58fdcc1eaa370dd00b94c2dd1a8169c0ac60c37f6f2772cbc2c5b63f2e62", size = 348665 },
+    { url = "https://files.pythonhosted.org/packages/c4/32/3a3a97eecff1f1e3a1dc90e9b00681abea11ec4f43a7ca549981261e18b6/llama_stack_client-0.1.2-py3-none-any.whl", hash = "sha256:85ff0fb57a62d7d0470cfaa2b07a595c9fb3483297944d5e5a066db850d38ccd", size = 359415 },
 ]
 
 [[package]]

From ab9516c7899d2411f1096d6b63e6366c228d9460 Mon Sep 17 00:00:00 2001
From: Ellis Tarn <ellistarn@gmail.com>
Date: Mon, 10 Feb 2025 13:24:15 -0800
Subject: [PATCH 4/7] fix: Gaps in doc codegen (#1035)

# What does this PR do?
Catches docs up to source with:
```
python llama_stack/scripts/distro_codegen.py
```

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
[Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.*]
Manually checked
```
sphinx-autobuild docs/source build/html
```

[//]: # (## Documentation)
[//]: # (- [ ] Added a Changelog entry if the change is significant)
---
 distributions/dependencies.json               | 192 +++++++++---------
 .../self_hosted_distro/ollama.md              |   4 +-
 llama_stack/scripts/distro_codegen.py         |   2 +-
 llama_stack/templates/bedrock/run.yaml        |   2 +
 llama_stack/templates/cerebras/run.yaml       |   2 +
 .../templates/dell/run-with-safety.yaml       |   2 +
 llama_stack/templates/dell/run.yaml           |   2 +
 .../templates/fireworks/run-with-safety.yaml  |   2 +
 llama_stack/templates/fireworks/run.yaml      |   2 +
 .../hf-endpoint/run-with-safety.yaml          |   2 +
 llama_stack/templates/hf-endpoint/run.yaml    |   2 +
 .../hf-serverless/run-with-safety.yaml        |   2 +
 llama_stack/templates/hf-serverless/run.yaml  |   2 +
 .../meta-reference-gpu/run-with-safety.yaml   |   2 +
 .../templates/meta-reference-gpu/run.yaml     |   2 +
 .../meta-reference-quantized-gpu/run.yaml     |   2 +
 llama_stack/templates/nvidia/run.yaml         |   2 +
 .../templates/ollama/run-with-safety.yaml     |   2 +
 llama_stack/templates/ollama/run.yaml         |   2 +
 .../remote-vllm/run-with-safety.yaml          |   2 +
 llama_stack/templates/remote-vllm/run.yaml    |   2 +
 llama_stack/templates/sambanova/run.yaml      |   2 +
 .../templates/tgi/run-with-safety.yaml        |   2 +
 llama_stack/templates/tgi/run.yaml            |   2 +
 .../templates/together/run-with-safety.yaml   |   2 +
 llama_stack/templates/together/run.yaml       |   2 +
 llama_stack/templates/vllm-gpu/run.yaml       |   2 +
 27 files changed, 146 insertions(+), 100 deletions(-)

diff --git a/distributions/dependencies.json b/distributions/dependencies.json
index 6babf3440..c1450d97e 100644
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@@ -66,6 +66,40 @@
     "sentence-transformers --no-deps",
     "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ],
+  "dell": [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
   "fireworks": [
     "aiosqlite",
     "autoevals",
@@ -252,6 +286,38 @@
     "sentence-transformers --no-deps",
     "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ],
+  "nvidia": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
   "ollama": [
     "aiohttp",
     "aiosqlite",
@@ -319,6 +385,36 @@
     "sentence-transformers --no-deps",
     "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ],
+  "sambanova": [
+    "aiosqlite",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
   "tgi": [
     "aiohttp",
     "aiosqlite",
@@ -421,101 +517,5 @@
     "vllm",
     "sentence-transformers --no-deps",
     "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "nvidia": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "datasets",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "sambanova": [
-    "aiosqlite",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "dell": [
-    "aiohttp",
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "huggingface_hub",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ]
 }
diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md
index 54f6b8fdf..73a609421 100644
--- a/docs/source/distributions/self_hosted_distro/ollama.md
+++ b/docs/source/distributions/self_hosted_distro/ollama.md
@@ -26,9 +26,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
-You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
-
-### Environment Variables
+You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.### Environment Variables
 
 The following environment variables can be configured:
 
diff --git a/llama_stack/scripts/distro_codegen.py b/llama_stack/scripts/distro_codegen.py
index 7064d3104..c73c15d41 100644
--- a/llama_stack/scripts/distro_codegen.py
+++ b/llama_stack/scripts/distro_codegen.py
@@ -29,7 +29,7 @@ def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
     if not templates_dir.exists():
         raise FileNotFoundError(f"Templates directory not found: {templates_dir}")
 
-    return (d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
+    return sorted(d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
 
 
 def process_template(template_dir: Path, progress) -> None:
diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml
index 39408c1bd..be6c9a928 100644
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@@ -115,3 +115,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml
index 5a70890a8..05d3f4525 100644
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@@ -117,3 +117,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml
index bdc82d03a..04c5957d4 100644
--- a/llama_stack/templates/dell/run-with-safety.yaml
+++ b/llama_stack/templates/dell/run-with-safety.yaml
@@ -116,3 +116,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml
index 2ba62a782..706444eb1 100644
--- a/llama_stack/templates/dell/run.yaml
+++ b/llama_stack/templates/dell/run.yaml
@@ -107,3 +107,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml
index a4b425436..0fbe14a5a 100644
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@@ -172,3 +172,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml
index a497317bd..ccf67dcbb 100644
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@@ -161,3 +161,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
index 0329f580b..f520a2fda 100644
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@@ -124,3 +124,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml
index 8163fe28e..708cb1bcc 100644
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@@ -114,3 +114,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml
index 9cee920a5..7f0abf5be 100644
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@@ -124,3 +124,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml
index c8ad0d38d..c0b7a4c60 100644
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@@ -114,3 +114,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
index 0faaabb15..c5286fc6b 100644
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@@ -126,3 +126,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml
index 6ffe1fa36..310585f23 100644
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@@ -115,3 +115,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
index 5ff87a901..d43cf3917 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
@@ -117,3 +117,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml
index 6dc325e9d..c8ae362f5 100644
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@@ -147,3 +147,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
index 5b5c9c253..ac5dab755 100644
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@@ -121,3 +121,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index 3cc1cb2ac..485223675 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -110,3 +110,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml
index 4a0fa9a85..1fe998a1f 100644
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@@ -126,3 +126,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml
index 9631f94a2..9d3db8a31 100644
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@@ -115,3 +115,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml
index 6cec51824..39b0f3c4e 100644
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@@ -126,3 +126,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml
index 503505c32..ed6c9ef6f 100644
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@@ -114,3 +114,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml
index f1953c513..8bf76f37b 100644
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@@ -113,3 +113,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml
index ec351108e..298926630 100644
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@@ -167,3 +167,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml
index c2afd98e9..920003759 100644
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@@ -156,3 +156,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml
index 165e4d51d..41a545e1a 100644
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@@ -117,3 +117,5 @@ tool_groups:
   provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
   provider_id: code-interpreter
+server:
+  port: 8321

From afca9d92f93b6384d1344be0726ea1d4c0b2c5e9 Mon Sep 17 00:00:00 2001
From: Ellis Tarn <ellistarn@gmail.com>
Date: Mon, 10 Feb 2025 13:35:16 -0800
Subject: [PATCH 5/7] fix: Readthedocs cannot parse comments, resulting in docs
 bugs (#1033)

---
 docs/source/distributions/self_hosted_distro/dell.md  |  2 +-
 .../distributions/self_hosted_distro/fireworks.md     |  2 +-
 .../self_hosted_distro/meta-reference-gpu.md          |  2 +-
 .../meta-reference-quantized-gpu.md                   |  2 +-
 .../source/distributions/self_hosted_distro/ollama.md |  2 +-
 .../distributions/self_hosted_distro/remote-vllm.md   |  2 +-
 .../distributions/self_hosted_distro/sambanova.md     |  2 +-
 docs/source/distributions/self_hosted_distro/tgi.md   |  2 +-
 .../distributions/self_hosted_distro/together.md      |  2 +-
 llama_stack/templates/template.py                     | 11 +++++++++--
 10 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/docs/source/distributions/self_hosted_distro/dell.md b/docs/source/distributions/self_hosted_distro/dell.md
index be326ffa5..aef3ecf58 100644
--- a/docs/source/distributions/self_hosted_distro/dell.md
+++ b/docs/source/distributions/self_hosted_distro/dell.md
@@ -1,7 +1,7 @@
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 
 # Dell Distribution of Llama Stack
 
diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md
index 9afeb4894..f77d9f656 100644
--- a/docs/source/distributions/self_hosted_distro/fireworks.md
+++ b/docs/source/distributions/self_hosted_distro/fireworks.md
@@ -1,7 +1,7 @@
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 # Fireworks Distribution
 
 ```{toctree}
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
index d00d8177f..b183757db 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -1,7 +1,7 @@
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 # Meta Reference Distribution
 
 ```{toctree}
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
index e46c2d112..9aeb7a88b 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
@@ -1,7 +1,7 @@
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 # Meta Reference Quantized Distribution
 
 ```{toctree}
diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md
index 73a609421..c015b9610 100644
--- a/docs/source/distributions/self_hosted_distro/ollama.md
+++ b/docs/source/distributions/self_hosted_distro/ollama.md
@@ -1,7 +1,7 @@
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 # Ollama Distribution
 
 ```{toctree}
diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md
index ff626d40d..6c3bbd1d0 100644
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@@ -1,7 +1,7 @@
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 # Remote vLLM Distribution
 ```{toctree}
 :maxdepth: 2
diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md
index 86ef4ac58..e6ac616be 100644
--- a/docs/source/distributions/self_hosted_distro/sambanova.md
+++ b/docs/source/distributions/self_hosted_distro/sambanova.md
@@ -1,7 +1,7 @@
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 # SambaNova Distribution
 
 ```{toctree}
diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md
index b970ab9fe..f4eecf2cd 100644
--- a/docs/source/distributions/self_hosted_distro/tgi.md
+++ b/docs/source/distributions/self_hosted_distro/tgi.md
@@ -1,7 +1,7 @@
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 
 # TGI Distribution
 
diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md
index 45ae462d5..8e36c1eb0 100644
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@@ -1,7 +1,7 @@
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 # Together Distribution
 
 ```{toctree}
diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py
index 09efd2038..04a09741c 100644
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@@ -131,8 +131,15 @@ class DistributionTemplate(BaseModel):
             providers_str = ", ".join(f"`{p}`" for p in providers)
             providers_table += f"| {api} | {providers_str} |\n"
 
-        template = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
-        template += self.template_path.read_text()
+        template = self.template_path.read_text()
+        comment = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
+        orphantext = "---\norphan: true\n---\n"
+
+        if template.startswith(orphantext):
+            template = template.replace(orphantext, orphantext + comment)
+        else:
+            template = comment + template
+
         # Render template with rich-generated table
         env = jinja2.Environment(
             trim_blocks=True,

From 36d35406a77df5bc4dad341a4ac64f2d8e3b8a5b Mon Sep 17 00:00:00 2001
From: Ellis Tarn <ellistarn@gmail.com>
Date: Mon, 10 Feb 2025 14:27:17 -0800
Subject: [PATCH 6/7] fix: a bad newline in ollama docs (#1036)

# What does this PR do?
Catches a bug in the previous codegen which was removing newlines.

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
```
python llama_stack/scripts/distro_codegen.py
```

[//]: # (## Documentation)
[//]: # (- [ ] Added a Changelog entry if the change is significant)
---
 docs/source/distributions/self_hosted_distro/ollama.md | 4 +++-
 llama_stack/templates/ollama/doc_template.md           | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md
index c015b9610..a3a45f9a8 100644
--- a/docs/source/distributions/self_hosted_distro/ollama.md
+++ b/docs/source/distributions/self_hosted_distro/ollama.md
@@ -26,7 +26,9 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
-You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.### Environment Variables
+You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
+
+### Environment Variables
 
 The following environment variables can be configured:
 
diff --git a/llama_stack/templates/ollama/doc_template.md b/llama_stack/templates/ollama/doc_template.md
index eb4aadd29..29efe39c3 100644
--- a/llama_stack/templates/ollama/doc_template.md
+++ b/llama_stack/templates/ollama/doc_template.md
@@ -16,7 +16,7 @@ The `llamastack/distribution-{{ name }}` distribution consists of the following
 
 You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
 
-{%- if run_config_env_vars %}
+{% if run_config_env_vars %}
 ### Environment Variables
 
 The following environment variables can be configured:

From 3856927ee8ac4c3d8c6b1cc36cbaf0be29cf5076 Mon Sep 17 00:00:00 2001
From: Bill Murdock <bmurdock@redhat.com>
Date: Mon, 10 Feb 2025 18:08:33 -0500
Subject: [PATCH 7/7] fix: Update Qdrant support post-refactor (#1022)

# What does this PR do?

I tried running the Qdrant provider and found some bugs. See #1021 for
details. @terrytangyuan wrote there:

> Please feel free to submit your changes in a PR. I fixed similar
issues for pgvector provider. This might be an issue introduced from a
refactoring.

So I am submitting this PR.

Closes #1021

## Test Plan

Here are the highlights for what I did to test this:

References:
-
https://llama-stack.readthedocs.io/en/latest/getting_started/index.html
-
https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/rag_with_vector_db.py
-
https://github.com/meta-llama/llama-stack/blob/main/docs/zero_to_hero_guide/README.md#build-configure-and-run-llama-stack

Install and run Qdrant server:

```
podman pull qdrant/qdrant
mkdir qdrant-data
podman run -p 6333:6333 -v $(pwd)/qdrant-data:/qdrant/storage qdrant/qdrant
```

Install and run Llama Stack from the venv-support PR (mainly because I
didn't want to install conda):

```
brew install cmake # Should just need this once

git clone https://github.com/meta-llama/llama-models.git
gh repo clone cdoern/llama-stack
cd llama-stack
gh pr checkout 1018 # This is the checkout that introduces venv support for build/run.  Otherwise you have to use conda.  Eventually this wil be part of main, hopefully.

uv sync --extra dev
uv pip install -e .
source .venv/bin/activate
uv pip install qdrant_client

LLAMA_STACK_DIR=$(pwd) LLAMA_MODELS_DIR=../llama-models llama stack build --template ollama --image-type venv
```
```
edit llama_stack/templates/ollama/run.yaml
```

in that editor under:
```
  vector_io:
```
add:
```
  - provider_id: qdrant
    provider_type: remote::qdrant
    config: {}
```

see
https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/vector_io/qdrant/config.py#L14
for config options (but I didn't need any)

```
LLAMA_STACK_DIR=$(pwd) LLAMA_MODELS_DIR=../llama-models llama stack run ollama --image-type venv \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
   --env SAFETY_MODEL=$SAFETY_MODEL \
   --env OLLAMA_URL=$OLLAMA_URL
```

Then I tested it out in a notebook.  Key highlights included:

```
qdrant_provider = None
for provider in client.providers.list():
    if provider.api == "vector_io" and provider.provider_id == "qdrant":
        qdrant_provider = provider
qdrant_provider
assert qdrant_provider is not None, "QDrant is not a provider.  You need to edit the run yaml file you use in your `llama stack run` call"

vector_db_id = f"test-vector-db-{uuid.uuid4().hex}"
client.vector_dbs.register(
    vector_db_id=vector_db_id,
    embedding_model="all-MiniLM-L6-v2",
    embedding_dimension=384,
    provider_id=qdrant_provider.provider_id,
)
```

Other than that, I just followed what was in
https://llama-stack.readthedocs.io/en/latest/getting_started/index.html

It would be good to have automated tests for this in the future, but
that would be a big undertaking.

Signed-off-by: Bill Murdock <bmurdock@redhat.com>
---
 llama_stack/providers/remote/vector_io/qdrant/__init__.py | 4 ++--
 llama_stack/providers/remote/vector_io/qdrant/qdrant.py   | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/llama_stack/providers/remote/vector_io/qdrant/__init__.py
index 54605fcf9..c584e29ef 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/__init__.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/__init__.py
@@ -12,8 +12,8 @@ from .config import QdrantConfig
 
 
 async def get_adapter_impl(config: QdrantConfig, deps: Dict[Api, ProviderSpec]):
-    from .qdrant import QdrantVectorMemoryAdapter
+    from .qdrant import QdrantVectorDBAdapter
 
-    impl = QdrantVectorMemoryAdapter(config, deps[Api.inference])
+    impl = QdrantVectorDBAdapter(config, deps[Api.inference])
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 719070528..e7ad136eb 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -55,7 +55,7 @@ class QdrantIndex(EmbeddingIndex):
 
         points = []
         for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
-            chunk_id = f"{chunk.document_id}:chunk-{i}"
+            chunk_id = f"{chunk.metadata['document_id']}:chunk-{i}"
             points.append(
                 PointStruct(
                     id=convert_id(chunk_id),
@@ -93,6 +93,9 @@ class QdrantIndex(EmbeddingIndex):
 
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
+    async def delete(self):
+        await self.client.delete_collection(collection_name=self.collection_name)
+
 
 class QdrantVectorDBAdapter(VectorIO, VectorDBsProtocolPrivate):
     def __init__(self, config: QdrantConfig, inference_api: Api.inference) -> None: