Merge branch 'main' into nvidia-e2e-notebook

2025-07-21 20:18:52 +00:00 · 2025-05-28 17:48:15 -04:00 · 2025-05-28 17:48:15 -04:00 · f5cb965f0f
commit f5cb965f0f
parent 51b68b4be6 0b695538af
226 changed files with 16519 additions and 8666 deletions
--- a/llama_stack/templates/bedrock/build.yaml
+++ b/llama_stack/templates/bedrock/build.yaml
@ -29,3 +29,5 @@ distribution_spec:
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@ -35,6 +35,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -96,6 +99,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/inference_store.db
 models:
 - metadata: {}
  model_id: meta.llama3-1-8b-instruct-v1:0
--- a/llama_stack/templates/cerebras/build.yaml
+++ b/llama_stack/templates/cerebras/build.yaml
@ -29,3 +29,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::rag-runtime
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@ -41,6 +41,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/responses_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -99,6 +102,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/inference_store.db
 models:
 - metadata: {}
  model_id: llama3.1-8b
--- a/llama_stack/templates/ci-tests/build.yaml
+++ b/llama_stack/templates/ci-tests/build.yaml
@ -30,3 +30,5 @@ distribution_spec:
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/ci-tests/run.yaml
+++ b/llama_stack/templates/ci-tests/run.yaml
@ -38,6 +38,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -99,6 +102,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/inference_store.db
 models:
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
--- a/llama_stack/templates/dell/build.yaml
+++ b/llama_stack/templates/dell/build.yaml
@ -30,3 +30,6 @@ distribution_spec:
    - remote::tavily-search
    - inline::rag-runtime
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/dell/run-with-safety.yaml
+++ b/llama_stack/templates/dell/run-with-safety.yaml
@ -41,6 +41,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -99,6 +102,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/dell/run.yaml
+++ b/llama_stack/templates/dell/run.yaml
@ -37,6 +37,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -95,6 +98,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/dependencies.json
+++ b/llama_stack/templates/dependencies.json
@ -1,843 +0,0 @@
-{
-  "bedrock": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "boto3",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "langdetect",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn"
-  ],
-  "cerebras": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "cerebras_cloud_sdk",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "langdetect",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "ci-tests": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "fastapi",
-    "fire",
-    "fireworks-ai",
-    "httpx",
-    "langdetect",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "sqlite-vec",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "dell": [
-    "aiohttp",
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "huggingface_hub",
-    "langdetect",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "fireworks": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "fireworks-ai",
-    "httpx",
-    "langdetect",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "groq": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "datasets",
-    "emoji",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "langdetect",
-    "litellm",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn"
-  ],
-  "hf-endpoint": [
-    "aiohttp",
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "huggingface_hub",
-    "langdetect",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn"
-  ],
-  "hf-serverless": [
-    "aiohttp",
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "huggingface_hub",
-    "langdetect",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "llama_api": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "fastapi",
-    "fire",
-    "httpx",
-    "langdetect",
-    "litellm",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "sqlite-vec",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "meta-reference-gpu": [
-    "accelerate",
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "fairscale",
-    "faiss-cpu",
-    "fastapi",
-    "fbgemm-gpu-genai==1.1.2",
-    "fire",
-    "httpx",
-    "langdetect",
-    "lm-format-enforcer",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentence-transformers",
-    "sentencepiece",
-    "torch",
-    "torchao==0.8.0",
-    "torchvision",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "zmq"
-  ],
-  "nvidia": [
-    "aiohttp",
-    "aiosqlite",
-    "blobfile",
-    "chardet",
-    "datasets",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "uvicorn"
-  ],
-  "ollama": [
-    "aiohttp",
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "langdetect",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "ollama",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "peft",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "torch",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "trl",
-    "uvicorn"
-  ],
-  "open-benchmark": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "fastapi",
-    "fire",
-    "httpx",
-    "langdetect",
-    "litellm",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "sqlite-vec",
-    "together",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn"
-  ],
-  "passthrough": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "langdetect",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "remote-vllm": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "langdetect",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "sambanova": [
-    "aiosqlite",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "litellm",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "starter": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "fastapi",
-    "fire",
-    "fireworks-ai",
-    "httpx",
-    "langdetect",
-    "litellm",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "sqlite-vec",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "tgi": [
-    "aiohttp",
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "huggingface_hub",
-    "langdetect",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "together": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "langdetect",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "together",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "verification": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "fastapi",
-    "fire",
-    "httpx",
-    "langdetect",
-    "litellm",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "sqlite-vec",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "vllm-gpu": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "emoji",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "langdetect",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "vllm",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "watsonx": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "datasets",
-    "emoji",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "ibm_watson_machine_learning",
-    "langdetect",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pymongo",
-    "pypdf",
-    "pythainlp",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "tree_sitter",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
-  ]
-}
--- a/llama_stack/templates/fireworks/build.yaml
+++ b/llama_stack/templates/fireworks/build.yaml
@ -31,3 +31,6 @@ distribution_spec:
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@ -46,6 +46,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -111,6 +114,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db
 models:
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@ -41,6 +41,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -106,6 +109,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db
 models:
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
--- a/llama_stack/templates/groq/build.yaml
+++ b/llama_stack/templates/groq/build.yaml
@ -26,3 +26,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::rag-runtime
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@ -41,6 +41,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -99,6 +102,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/inference_store.db
 models:
 - metadata: {}
  model_id: groq/llama3-8b-8192
--- a/llama_stack/templates/hf-endpoint/build.yaml
+++ b/llama_stack/templates/hf-endpoint/build.yaml
@ -29,3 +29,6 @@ distribution_spec:
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@ -46,6 +46,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -107,6 +110,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@ -41,6 +41,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -102,6 +105,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/hf-serverless/build.yaml
+++ b/llama_stack/templates/hf-serverless/build.yaml
@ -30,3 +30,6 @@ distribution_spec:
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@ -46,6 +46,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -107,6 +110,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@ -41,6 +41,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -102,6 +105,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/llama_api/build.yaml
+++ b/llama_stack/templates/llama_api/build.yaml
@ -30,3 +30,5 @@ distribution_spec:
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/llama_api/run.yaml
+++ b/llama_stack/templates/llama_api/run.yaml
@ -50,6 +50,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -111,6 +114,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/inference_store.db
 models:
 - metadata: {}
  model_id: Llama-3.3-70B-Instruct
--- a/llama_stack/templates/meta-reference-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-gpu/build.yaml
@ -29,3 +29,6 @@ distribution_spec:
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@ -56,6 +56,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -117,6 +120,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@ -46,6 +46,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -107,6 +110,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/nvidia/build.yaml
+++ b/llama_stack/templates/nvidia/build.yaml
@ -24,3 +24,6 @@ distribution_spec:
    tool_runtime:
    - inline::rag-runtime
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@ -46,6 +46,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -92,6 +95,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@ -41,6 +41,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -80,6 +83,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db
 models:
 - metadata: {}
  model_id: meta/llama3-8b-instruct
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@ -32,3 +32,6 @@ distribution_spec:
    - remote::model-context-protocol
    - remote::wolfram-alpha
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@ -40,6 +40,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -112,6 +115,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@ -38,6 +38,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -110,6 +113,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/open-benchmark/build.yaml
+++ b/llama_stack/templates/open-benchmark/build.yaml
@ -33,3 +33,5 @@ distribution_spec:
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@ -64,6 +64,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -125,6 +128,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/inference_store.db
 models:
 - metadata: {}
  model_id: openai/gpt-4o
--- a/llama_stack/templates/passthrough/build.yaml
+++ b/llama_stack/templates/passthrough/build.yaml
@ -31,3 +31,6 @@ distribution_spec:
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/passthrough/run-with-safety.yaml
+++ b/llama_stack/templates/passthrough/run-with-safety.yaml
@ -46,6 +46,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -111,6 +114,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db
 models:
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
--- a/llama_stack/templates/passthrough/run.yaml
+++ b/llama_stack/templates/passthrough/run.yaml
@ -41,6 +41,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -106,6 +109,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db
 models:
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
--- a/llama_stack/templates/remote-vllm/build.yaml
+++ b/llama_stack/templates/remote-vllm/build.yaml
@ -31,3 +31,6 @@ distribution_spec:
    - remote::model-context-protocol
    - remote::wolfram-alpha
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@ -50,6 +50,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -115,6 +118,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@ -43,6 +43,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -108,6 +111,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/sambanova/build.yaml
+++ b/llama_stack/templates/sambanova/build.yaml
@ -1,6 +1,6 @@
 version: '2'
 distribution_spec:
-  description: Use SambaNova for running LLM inference
+  description: Use SambaNova for running LLM inference and safety
  providers:
    inference:
    - remote::sambanova
@ -10,7 +10,7 @@ distribution_spec:
    - remote::chromadb
    - remote::pgvector
    safety:
-    - inline::llama-guard
+    - remote::sambanova
    agents:
    - inline::meta-reference
    telemetry:
@ -22,3 +22,5 @@ distribution_spec:
    - remote::model-context-protocol
    - remote::wolfram-alpha
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/sambanova/doc_template.md
+++ b/llama_stack/templates/sambanova/doc_template.md
@ -37,33 +37,44 @@ The following models are available by default:

 ### Prerequisite: API Keys

-Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](https://sambanova.ai/).
+Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](http://cloud.sambanova.ai?utm_source=llamastack&utm_medium=external&utm_campaign=cloud_signup).


 ## Running Llama Stack with SambaNova

 You can do this via Conda (build code) or Docker which has a pre-built image.

-### Via Docker

-This method allows you to get started quickly without having to build the distribution code.
+### Via Docker

 ```bash
 LLAMA_STACK_PORT=8321
+llama stack build --template sambanova --image-type container
 docker run \
  -it \
-  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  llamastack/distribution-{{ name }} \
+  -v ~/.llama:/root/.llama \
+  distribution-{{ name }} \
  --port $LLAMA_STACK_PORT \
  --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
 ```

+
+### Via Venv
+
+```bash
+llama stack build --template sambanova --image-type venv
+llama stack run --image-type venv ~/.llama/distributions/sambanova/sambanova-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
+```
+
+
 ### Via Conda

 ```bash
 llama stack build --template sambanova --image-type conda
-llama stack run ./run.yaml \
+llama stack run --image-type conda ~/.llama/distributions/sambanova/sambanova-run.yaml \
  --port $LLAMA_STACK_PORT \
  --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
 ```
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@ -38,10 +38,11 @@ providers:
      user: ${env.PGVECTOR_USER:}
      password: ${env.PGVECTOR_PASSWORD:}
  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
+  - provider_id: sambanova
+    provider_type: remote::sambanova
    config:
-      excluded_categories: []
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -50,6 +51,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -81,6 +85,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/inference_store.db
 models:
 - metadata: {}
  model_id: sambanova/Meta-Llama-3.1-8B-Instruct
@ -189,6 +196,9 @@ models:
  model_type: embedding
 shields:
 - shield_id: meta-llama/Llama-Guard-3-8B
+  provider_shield_id: sambanova/Meta-Llama-Guard-3-8B
+- shield_id: sambanova/Meta-Llama-Guard-3-8B
+  provider_shield_id: sambanova/Meta-Llama-Guard-3-8B
 vector_dbs: []
 datasets: []
 scoring_fns: []
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@ -34,7 +34,7 @@ def get_distribution_template() -> DistributionTemplate:
    providers = {
        "inference": ["remote::sambanova", "inline::sentence-transformers"],
        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-        "safety": ["inline::llama-guard"],
+        "safety": ["remote::sambanova"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
        "tool_runtime": [
@ -110,7 +110,7 @@ def get_distribution_template() -> DistributionTemplate:
    return DistributionTemplate(
        name=name,
        distro_type="self_hosted",
-        description="Use SambaNova for running LLM inference",
+        description="Use SambaNova for running LLM inference and safety",
        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
@ -122,7 +122,15 @@ def get_distribution_template() -> DistributionTemplate:
                    "vector_io": vector_io_providers,
                },
                default_models=default_models + [embedding_model],
-                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
+                default_shields=[
+                    ShieldInput(
+                        shield_id="meta-llama/Llama-Guard-3-8B", provider_shield_id="sambanova/Meta-Llama-Guard-3-8B"
+                    ),
+                    ShieldInput(
+                        shield_id="sambanova/Meta-Llama-Guard-3-8B",
+                        provider_shield_id="sambanova/Meta-Llama-Guard-3-8B",
+                    ),
+                ],
                default_tool_groups=default_tool_groups,
            ),
        },
--- a/llama_stack/templates/starter/build.yaml
+++ b/llama_stack/templates/starter/build.yaml
@ -35,3 +35,5 @@ distribution_spec:
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/starter/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@ -72,6 +72,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -133,6 +136,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/inference_store.db
 models:
 - metadata: {}
  model_id: openai/gpt-4o
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@ -29,6 +29,7 @@ from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig


 def get_model_registry(
@ -117,6 +118,10 @@ class RunConfigSettings(BaseModel):
                __distro_dir__=f"~/.llama/distributions/{name}",
                db_name="registry.db",
            ),
+            inference_store=SqliteSqlStoreConfig.sample_run_config(
+                __distro_dir__=f"~/.llama/distributions/{name}",
+                db_name="inference_store.db",
+            ),
            models=self.default_models or [],
            shields=self.default_shields or [],
            tool_groups=self.default_tool_groups or [],
@ -146,14 +151,20 @@ class DistributionTemplate(BaseModel):
    available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None

    def build_config(self) -> BuildConfig:
+        additional_pip_packages: list[str] = []
+        for run_config in self.run_configs.values():
+            run_config_ = run_config.run_config(self.name, self.providers, self.container_image)
+            if run_config_.inference_store:
+                additional_pip_packages.extend(run_config_.inference_store.pip_packages)
+
        return BuildConfig(
-            name=self.name,
            distribution_spec=DistributionSpec(
                description=self.description,
                container_image=self.container_image,
                providers=self.providers,
            ),
            image_type="conda",  # default to conda, can be overridden
+            additional_pip_packages=additional_pip_packages,
        )

    def generate_markdown_docs(self) -> str:
--- a/llama_stack/templates/tgi/build.yaml
+++ b/llama_stack/templates/tgi/build.yaml
@ -30,3 +30,6 @@ distribution_spec:
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@ -41,6 +41,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -102,6 +105,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@ -40,6 +40,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -101,6 +104,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/together/build.yaml
+++ b/llama_stack/templates/together/build.yaml
@ -31,3 +31,6 @@ distribution_spec:
    - remote::model-context-protocol
    - remote::wolfram-alpha
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@ -46,6 +46,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -111,6 +114,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db
 models:
 - metadata: {}
  model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@ -41,6 +41,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -106,6 +109,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db
 models:
 - metadata: {}
  model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
--- a/llama_stack/templates/verification/build.yaml
+++ b/llama_stack/templates/verification/build.yaml
@ -35,3 +35,5 @@ distribution_spec:
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/verification/run.yaml
+++ b/llama_stack/templates/verification/run.yaml
@ -74,6 +74,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -135,6 +138,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/inference_store.db
 models:
 - metadata: {}
  model_id: openai/gpt-4o
--- a/llama_stack/templates/vllm-gpu/build.yaml
+++ b/llama_stack/templates/vllm-gpu/build.yaml
@ -30,3 +30,5 @@ distribution_spec:
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@ -45,6 +45,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -106,6 +109,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/inference_store.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/templates/watsonx/build.yaml
+++ b/llama_stack/templates/watsonx/build.yaml
@ -28,3 +28,5 @@ distribution_spec:
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/watsonx/run.yaml
+++ b/llama_stack/templates/watsonx/run.yaml
@ -42,6 +42,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/responses_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -103,6 +106,9 @@ providers:
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/inference_store.db
 models:
 - metadata: {}
  model_id: meta-llama/llama-3-3-70b-instruct