From fffdab4f5c23b4ac045b898bdb1e69edda3a3498 Mon Sep 17 00:00:00 2001
From: Chacksu <connorhack10@gmail.com>
Date: Wed, 13 Aug 2025 09:18:25 -0400
Subject: [PATCH 1/2] fix: Dell distribution missing kvstore (#3113)

# What does this PR do?

- Added kvstore config to ChromaDB provider config for Dell distribution
similar to [starter
config](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distributions/starter/run.yaml#L110-L112)
- Fixed
[error](https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/inference/_generated/_async_client.py#L3424-L3425)
getting endpoint information by adding `hf-inference` as the provider to
the `AsyncInferenceClient` (TGI client).

## Test Plan
```
export INFERENCE_PORT=8181
export DEH_URL=http://0.0.0.0:$INFERENCE_PORT
export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
export CHROMADB_HOST=localhost
export CHROMADB_PORT=8000
export CHROMA_URL=http://$CHROMADB_HOST:$CHROMADB_PORT
export CUDA_VISIBLE_DEVICES=0
export LLAMA_STACK_PORT=8321
export HF_TOKEN=[redacted]

# TGI Server
docker run --rm -it \
  --pull always \
  --network host \
  -v $HOME/.cache/huggingface:/data \
  -e HF_TOKEN=$HF_TOKEN \
  -e PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
  -p $INFERENCE_PORT:$INFERENCE_PORT \
  --gpus all \
  ghcr.io/huggingface/text-generation-inference:latest \
  --dtype float16 \
  --usage-stats off \
  --sharded false \
  --cuda-memory-fraction 0.8 \
  --model-id meta-llama/Llama-3.2-3B-Instruct \
  --port $INFERENCE_PORT \
  --hostname 0.0.0.0

# Chrome DB
docker run --rm -it \
  --name chromadb \
  --net=host  -p 8000:8000 \
  -v ~/chroma:/chroma/chroma \
  -e IS_PERSISTENT=TRUE \
  -e ANONYMIZED_TELEMETRY=FALSE \
  chromadb/chroma:latest

# Llama Stack
llama stack run dell \
 --port $LLAMA_STACK_PORT \
 --env INFERENCE_MODEL=$INFERENCE_MODEL \
 --env DEH_URL=$DEH_URL \
 --env CHROMA_URL=$CHROMA_URL
```

---------

Co-authored-by: Connor Hack <connorhack@fb.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 llama_stack/distributions/dell/dell.py              | 8 +++++---
 llama_stack/distributions/dell/run-with-safety.yaml | 5 ++++-
 llama_stack/distributions/dell/run.yaml             | 5 ++++-
 llama_stack/providers/remote/inference/tgi/tgi.py   | 4 +---
 4 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/llama_stack/distributions/dell/dell.py b/llama_stack/distributions/dell/dell.py
index b561ea00e..e3bf0ee03 100644
--- a/llama_stack/distributions/dell/dell.py
+++ b/llama_stack/distributions/dell/dell.py
@@ -16,6 +16,7 @@ from llama_stack.distributions.template import DistributionTemplate, RunConfigSe
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
+from llama_stack.providers.remote.vector_io.chroma import ChromaVectorIOConfig
 
 
 def get_distribution_template() -> DistributionTemplate:
@@ -71,9 +72,10 @@ def get_distribution_template() -> DistributionTemplate:
     chromadb_provider = Provider(
         provider_id="chromadb",
         provider_type="remote::chromadb",
-        config={
-            "url": "${env.CHROMA_URL}",
-        },
+        config=ChromaVectorIOConfig.sample_run_config(
+            f"~/.llama/distributions/{name}/",
+            url="${env.CHROMADB_URL:=}",
+        ),
     )
 
     inference_model = ModelInput(
diff --git a/llama_stack/distributions/dell/run-with-safety.yaml b/llama_stack/distributions/dell/run-with-safety.yaml
index ecc6729eb..d89c92aa1 100644
--- a/llama_stack/distributions/dell/run-with-safety.yaml
+++ b/llama_stack/distributions/dell/run-with-safety.yaml
@@ -26,7 +26,10 @@ providers:
   - provider_id: chromadb
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMA_URL}
+      url: ${env.CHROMADB_URL:=}
+      kvstore:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
diff --git a/llama_stack/distributions/dell/run.yaml b/llama_stack/distributions/dell/run.yaml
index fc2553526..7397410ba 100644
--- a/llama_stack/distributions/dell/run.yaml
+++ b/llama_stack/distributions/dell/run.yaml
@@ -22,7 +22,10 @@ providers:
   - provider_id: chromadb
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMA_URL}
+      url: ${env.CHROMADB_URL:=}
+      kvstore:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py
index a5bb079ef..323831845 100644
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -308,9 +308,7 @@ class TGIAdapter(_HfAdapter):
         if not config.url:
             raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
         log.info(f"Initializing TGI client with url={config.url}")
-        self.client = AsyncInferenceClient(
-            model=config.url,
-        )
+        self.client = AsyncInferenceClient(model=config.url, provider="hf-inference")
         endpoint_info = await self.client.get_endpoint_info()
         self.max_tokens = endpoint_info["max_total_tokens"]
         self.model_id = endpoint_info["model_id"]

From 5bd6cb52fb9dd5a1a4defaeef0fe881cce59efdd Mon Sep 17 00:00:00 2001
From: Krzysztof Malczuk <2000krzysztof@gmail.com>
Date: Wed, 13 Aug 2025 15:14:03 +0100
Subject: [PATCH 2/2] fix: github action canceling valid tasks for checking
 semantic pr title (#3127)

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->
This PR changes the group name from github.ref to
github.even.pull_request_number. The reason for this is that github.ref
does not act as a unique identifier in the pull_request_target event and
only is unique in pull_request. The github action was getting canceled
was because the group name was not unique in the concurrency section.

<!-- If resolving an issue, uncomment and update the line below -->
Closes #3102

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
To test this I have created a fake github action and ran it trough act
to see what the github.ref variable produced and what alternatives can
be used. This confirmed that the github.ref was not unique and that
github.event.pull_request_number is unique to the PR.
---
 .github/workflows/semantic-pr.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/semantic-pr.yml b/.github/workflows/semantic-pr.yml
index 4df7324c4..57a4df646 100644
--- a/.github/workflows/semantic-pr.yml
+++ b/.github/workflows/semantic-pr.yml
@@ -11,7 +11,7 @@ on:
       - synchronize
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
   cancel-in-progress: true
 
 permissions: