diff --git a/benchmarking/k8s-benchmark/stack_run_config.yaml b/benchmarking/k8s-benchmark/stack_run_config.yaml
index 5a9e2ae4f..2ccaa21aa 100644
--- a/benchmarking/k8s-benchmark/stack_run_config.yaml
+++ b/benchmarking/k8s-benchmark/stack_run_config.yaml
@@ -95,25 +95,34 @@ providers:
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
     config: {}
-metadata_store:
-  type: postgres
-  host: ${env.POSTGRES_HOST:=localhost}
-  port: ${env.POSTGRES_PORT:=5432}
-  db: ${env.POSTGRES_DB:=llamastack}
-  user: ${env.POSTGRES_USER:=llamastack}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
-  table_name: llamastack_kvstore
-inference_store:
-  type: postgres
-  host: ${env.POSTGRES_HOST:=localhost}
-  port: ${env.POSTGRES_PORT:=5432}
-  db: ${env.POSTGRES_DB:=llamastack}
-  user: ${env.POSTGRES_USER:=llamastack}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+storage:
+  backends:
+    kv_default:
+      type: kv_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+    sql_default:
+      type: sql_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+  references:
+    metadata:
+      backend: kv_default
+      namespace: registry
+    inference:
+      backend: sql_default
+      table_name: inference_store
 models:
 - metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
+    embedding_dimension: 768
+  model_id: nomic-embed-text-v1.5
   provider_id: sentence-transformers
   model_type: embedding
 - model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/llama_stack/providers/remote/inference/nvidia/config.py
index df623934b..b67c9e7a0 100644
--- a/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/llama_stack/providers/remote/inference/nvidia/config.py
@@ -7,7 +7,7 @@
 import os
 from typing import Any
 
-from pydantic import BaseModel, Field, SecretStr
+from pydantic import BaseModel, Field, SecretStr, field_validator
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.schema_utils import json_schema_type
@@ -48,7 +48,7 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
         description="A base url for accessing the NVIDIA NIM",
     )
     api_key: SecretStr | None = Field(
-        default_factory=lambda: SecretStr(os.getenv("NVIDIA_API_KEY")),
+        default=None,
         description="The NVIDIA API key, only needed of using the hosted service",
     )
     timeout: int = Field(
@@ -60,6 +60,22 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
         description="When set to false, the API version will not be appended to the base_url. By default, it is true.",
     )
 
+    @field_validator("api_key", mode="before")
+    @classmethod
+    def _default_api_key_from_env(cls, value: SecretStr | str | None) -> SecretStr | None:
+        """Populate the API key from the NVIDIA_API_KEY environment variable when absent."""
+        if value is None:
+            env_value = os.getenv("NVIDIA_API_KEY")
+            return SecretStr(env_value) if env_value else None
+
+        if isinstance(value, SecretStr):
+            return value
+
+        if isinstance(value, str):
+            return SecretStr(value)
+
+        return value
+
     @classmethod
     def sample_run_config(
         cls,
diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/llama_stack/providers/remote/inference/runpod/runpod.py
index 6d5968f82..32a629131 100644
--- a/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -23,7 +23,6 @@ class RunpodInferenceAdapter(OpenAIMixin):
     """
 
     config: RunpodImplConfig
-
     provider_data_api_key_field: str = "runpod_api_token"
 
     def get_api_key(self) -> str:
diff --git a/pyproject.toml b/pyproject.toml
index 63108349b..8eb7be4e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ required-version = ">=0.7.0"
 
 [project]
 name = "llama_stack"
-version = "0.2.23"
+version = "0.3.0"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 description = "Llama Stack"
 readme = "README.md"
@@ -25,17 +25,17 @@ classifiers = [
 ]
 dependencies = [
     "aiohttp",
-    "fastapi>=0.115.0,<1.0",                  # server
-    "fire",                                   # for MCP in LLS client
+    "databricks-sdk",
+    "fastapi>=0.115.0,<1.0",                          # server
+    "fire",                                           # for MCP in LLS client
     "httpx",
-    "huggingface-hub>=0.34.0,<1.0",
     "jinja2>=3.1.6",
     "jsonschema",
-    "llama-stack-client>=0.2.23",
-    "openai>=1.107",                                # for expires_after support
+    "llama-stack-client>=0.3.0",
+    "openai>=1.107",                                  # for expires_after support
     "prompt-toolkit",
     "python-dotenv",
-    "python-jose[cryptography]",
+    "pyjwt[crypto]>=2.10.0",                          # Pull crypto to support RS256 for jwt. Requires 2.10.0+ for ssl_context support.
     "pydantic>=2.11.9",
     "rich",
     "starlette",
@@ -43,20 +43,20 @@ dependencies = [
     "tiktoken",
     "pillow",
     "h11>=0.16.0",
-    "python-multipart>=0.0.20",               # For fastapi Form
-    "uvicorn>=0.34.0",                        # server
-    "opentelemetry-sdk>=1.30.0",              # server
+    "python-multipart>=0.0.20",                       # For fastapi Form
+    "uvicorn>=0.34.0",                                # server
+    "opentelemetry-sdk>=1.30.0",                      # server
     "opentelemetry-exporter-otlp-proto-http>=1.30.0", # server
-    "aiosqlite>=0.21.0",                      # server - for metadata store
-    "asyncpg",                                # for metadata store
-    "sqlalchemy[asyncio]>=2.0.41",           # server - for conversations
+    "aiosqlite>=0.21.0",                              # server - for metadata store
+    "asyncpg",                                        # for metadata store
+    "sqlalchemy[asyncio]>=2.0.41",                    # server - for conversations
 ]
 
 [project.optional-dependencies]
 ui = [
     "streamlit",
     "pandas",
-    "llama-stack-client>=0.2.23",
+    "llama-stack-client>=0.3.0",
     "streamlit-option-menu",
 ]
 
@@ -68,14 +68,14 @@ dev = [
     "pytest-cov",
     "pytest-html",
     "pytest-json-report",
-    "pytest-socket", # For blocking network access in unit tests
-    "nbval", # For notebook testing
+    "pytest-socket",       # For blocking network access in unit tests
+    "nbval",               # For notebook testing
     "black",
     "ruff",
     "types-requests",
     "types-setuptools",
     "pre-commit",
-    "ruamel.yaml", # needed for openapi generator
+    "ruamel.yaml",         # needed for openapi generator
 ]
 # These are the dependencies required for running unit tests.
 unit = [
@@ -122,6 +122,8 @@ test = [
     "sqlalchemy",
     "sqlalchemy[asyncio]>=2.0.41",
     "requests",
+    "chromadb>=1.0.15",
+    "qdrant-client",
     "pymilvus>=2.6.1",
     "milvus-lite>=2.5.0",
     "weaviate-client>=4.16.4",
@@ -146,9 +148,7 @@ docs = [
     "requests",
 ]
 codegen = ["rich", "pydantic>=2.11.9", "jinja2>=3.1.6"]
-benchmark = [
-    "locust>=2.39.1",
-]
+benchmark = ["locust>=2.39.1"]
 
 [project.urls]
 Homepage = "https://github.com/llamastack/llama-stack"
@@ -247,7 +247,6 @@ follow_imports = "silent"
 # to exclude the entire directory.
 exclude = [
     # As we fix more and more of these, we should remove them from the list
-    "^llama_stack/cli/download\\.py$",
     "^llama_stack.core/build\\.py$",
     "^llama_stack.core/client\\.py$",
     "^llama_stack.core/request_headers\\.py$",
@@ -337,6 +336,5 @@ classmethod-decorators = ["classmethod", "pydantic.field_validator"]
 [tool.pytest.ini_options]
 addopts = ["--durations=10"]
 asyncio_mode = "auto"
-markers = [
-    "allow_network: Allow network access for specific unit tests",
-]
+markers = ["allow_network: Allow network access for specific unit tests"]
+filterwarnings = "ignore::DeprecationWarning"