diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py index a645441e2..297fdae51 100644 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py @@ -69,7 +69,8 @@ def get_distribution_template() -> DistributionTemplate: endpoint_name="${env.SAFETY_INFERENCE_ENDPOINT_NAME}", ), ), - ] + ], + "memory": [memory_provider], }, default_models=[ inference_model, diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml index 88ee0f8b5..bd625ffc5 100644 --- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml +++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml @@ -31,12 +31,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index d1d12a3b2..835495bb9 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -70,7 +70,8 @@ def get_distribution_template() -> DistributionTemplate: repo="${env.SAFETY_MODEL}", ), ), - ] + ], + "memory": [memory_provider], }, default_models=[ inference_model, diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml index a959c4310..f5ead14d4 100644 --- a/llama_stack/templates/hf-serverless/run-with-safety.yaml +++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml @@ -31,12 +31,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 649234e46..0aff9f39c 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -77,6 +77,7 @@ def get_distribution_template() -> DistributionTemplate: ), ), ], + "memory": [memory_provider], }, default_models=[ inference_model, diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index 6c1788841..d0fa05e96 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -33,12 +33,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 3942d77a1..c24dfa6e9 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -7,6 +7,7 @@ from pathlib import Path from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.remote.inference.ollama import OllamaImplConfig from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -22,12 +23,17 @@ def get_distribution_template() -> DistributionTemplate: "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], } - + name = "ollama" inference_provider = Provider( provider_id="ollama", provider_type="remote::ollama", config=OllamaImplConfig.sample_run_config(), ) + memory_provider = Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissImplConfig.sample_run_config(f"distributions/{name}"), + ) inference_model = ModelInput( model_id="${env.INFERENCE_MODEL}", @@ -39,7 +45,7 @@ def get_distribution_template() -> DistributionTemplate: ) return DistributionTemplate( - name="ollama", + name=name, distro_type="self_hosted", description="Use (an external) Ollama server for running LLM inference", docker_image=None, @@ -50,6 +56,7 @@ def get_distribution_template() -> DistributionTemplate: "run.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], + "memory": [memory_provider], }, default_models=[inference_model], ), @@ -57,7 +64,8 @@ def get_distribution_template() -> DistributionTemplate: provider_overrides={ "inference": [ inference_provider, - ] + ], + "memory": [memory_provider], }, default_models=[ inference_model, diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 86c2694a7..dc282f996 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -25,12 +25,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index cd9c8b931..ab8e12839 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -25,12 +25,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 8059b3237..c0849e2d0 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -30,12 +30,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index 68ab8d348..f5ccfcf16 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -70,6 +70,7 @@ def get_distribution_template() -> DistributionTemplate: ), ), ], + "memory": [memory_provider], }, default_models=[ inference_model, diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml index 06df6d8ec..2ee82ddc3 100644 --- a/llama_stack/templates/tgi/run-with-safety.yaml +++ b/llama_stack/templates/tgi/run-with-safety.yaml @@ -29,12 +29,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db - - provider_id: chromadb - provider_type: remote::chromadb - config: {} - - provider_id: pgvector - provider_type: remote::pgvector - config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index edba61804..83818a598 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -74,6 +74,7 @@ def get_distribution_template() -> DistributionTemplate: ), ), ], + "memory": [memory_provider], }, default_models=[ inference_model,