diff --git a/.github/workflows/semantic-pr.yml b/.github/workflows/semantic-pr.yml index 4df7324c4..57a4df646 100644 --- a/.github/workflows/semantic-pr.yml +++ b/.github/workflows/semantic-pr.yml @@ -11,7 +11,7 @@ on: - synchronize concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} cancel-in-progress: true permissions: diff --git a/llama_stack/distributions/dell/dell.py b/llama_stack/distributions/dell/dell.py index b561ea00e..e3bf0ee03 100644 --- a/llama_stack/distributions/dell/dell.py +++ b/llama_stack/distributions/dell/dell.py @@ -16,6 +16,7 @@ from llama_stack.distributions.template import DistributionTemplate, RunConfigSe from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) +from llama_stack.providers.remote.vector_io.chroma import ChromaVectorIOConfig def get_distribution_template() -> DistributionTemplate: @@ -71,9 +72,10 @@ def get_distribution_template() -> DistributionTemplate: chromadb_provider = Provider( provider_id="chromadb", provider_type="remote::chromadb", - config={ - "url": "${env.CHROMA_URL}", - }, + config=ChromaVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}/", + url="${env.CHROMADB_URL:=}", + ), ) inference_model = ModelInput( diff --git a/llama_stack/distributions/dell/run-with-safety.yaml b/llama_stack/distributions/dell/run-with-safety.yaml index ecc6729eb..d89c92aa1 100644 --- a/llama_stack/distributions/dell/run-with-safety.yaml +++ b/llama_stack/distributions/dell/run-with-safety.yaml @@ -26,7 +26,10 @@ providers: - provider_id: chromadb provider_type: remote::chromadb config: - url: ${env.CHROMA_URL} + url: ${env.CHROMADB_URL:=} + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/distributions/dell/run.yaml b/llama_stack/distributions/dell/run.yaml index fc2553526..7397410ba 100644 --- a/llama_stack/distributions/dell/run.yaml +++ b/llama_stack/distributions/dell/run.yaml @@ -22,7 +22,10 @@ providers: - provider_id: chromadb provider_type: remote::chromadb config: - url: ${env.CHROMA_URL} + url: ${env.CHROMADB_URL:=} + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index a5bb079ef..323831845 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -308,9 +308,7 @@ class TGIAdapter(_HfAdapter): if not config.url: raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.") log.info(f"Initializing TGI client with url={config.url}") - self.client = AsyncInferenceClient( - model=config.url, - ) + self.client = AsyncInferenceClient(model=config.url, provider="hf-inference") endpoint_info = await self.client.get_endpoint_info() self.max_tokens = endpoint_info["max_total_tokens"] self.model_id = endpoint_info["model_id"]