Merge branch 'main' into patch-1

2026-01-01 01:00:00 +00:00 · 2025-01-17 23:49:05 -05:00 · 2025-01-17 23:49:05 -05:00 · f5edd07b29
commit f5edd07b29
parent 04934d4644 3a9468ce9b
91 changed files with 995 additions and 632 deletions
--- a/llama_stack/apis/common/type_system.py
+++ b/llama_stack/apis/common/type_system.py
@ -6,54 +6,65 @@

 from typing import Literal, Union

-from llama_models.schema_utils import register_schema
+from llama_models.schema_utils import json_schema_type, register_schema
 from pydantic import BaseModel, Field
 from typing_extensions import Annotated


+@json_schema_type
 class StringType(BaseModel):
    type: Literal["string"] = "string"


+@json_schema_type
 class NumberType(BaseModel):
    type: Literal["number"] = "number"


+@json_schema_type
 class BooleanType(BaseModel):
    type: Literal["boolean"] = "boolean"


+@json_schema_type
 class ArrayType(BaseModel):
    type: Literal["array"] = "array"


+@json_schema_type
 class ObjectType(BaseModel):
    type: Literal["object"] = "object"


+@json_schema_type
 class JsonType(BaseModel):
    type: Literal["json"] = "json"


+@json_schema_type
 class UnionType(BaseModel):
    type: Literal["union"] = "union"


+@json_schema_type
 class ChatCompletionInputType(BaseModel):
    # expects List[Message] for messages
    type: Literal["chat_completion_input"] = "chat_completion_input"


+@json_schema_type
 class CompletionInputType(BaseModel):
    # expects InterleavedTextMedia for content
    type: Literal["completion_input"] = "completion_input"


+@json_schema_type
 class AgentTurnInputType(BaseModel):
    # expects List[Message] for messages (may also include attachments?)
    type: Literal["agent_turn_input"] = "agent_turn_input"


+@json_schema_type
 class DialogType(BaseModel):
    # expects List[Message] for messages
    # this type semantically contains the output label whereas ChatCompletionInputType does not
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@ -182,8 +182,8 @@ def _generate_run_config(
    """
    apis = list(build_config.distribution_spec.providers.keys())
    run_config = StackRunConfig(
-        docker_image=(
-            image_name if build_config.image_type == ImageType.docker.value else None
+        container_image=(
+            image_name if build_config.image_type == ImageType.container.value else None
        ),
        image_name=image_name,
        apis=apis,
@ -238,7 +238,7 @@ def _run_stack_build_command_from_build_config(
    image_name: Optional[str] = None,
    template_name: Optional[str] = None,
 ) -> None:
-    if build_config.image_type == ImageType.docker.value:
+    if build_config.image_type == ImageType.container.value:
        if template_name:
            image_name = f"distribution-{template_name}"
        else:
--- a/llama_stack/cli/stack/build.py
+++ b/llama_stack/cli/stack/build.py
@ -47,8 +47,8 @@ class StackBuild(Subcommand):
        self.parser.add_argument(
            "--image-type",
            type=str,
-            help="Image Type to use for the build. This can be either conda or docker. If not specified, will use the image type from the template config.",
-            choices=["conda", "docker", "venv"],
+            help="Image Type to use for the build. This can be either conda or container or venv. If not specified, will use the image type from the template config.",
+            choices=["conda", "container", "venv"],
            default="conda",
        )

--- a/llama_stack/cli/stack/configure.py
+++ b/llama_stack/cli/stack/configure.py
@ -27,7 +27,7 @@ class StackConfigure(Subcommand):
        self.parser.add_argument(
            "config",
            type=str,
-            help="Path to the build config file (e.g. ~/.llama/builds/<image_type>/<name>-build.yaml). For docker, this could also be the name of the docker image. ",
+            help="Path to the build config file (e.g. ~/.llama/builds/<image_type>/<name>-build.yaml). For container, this could also be the name of the container image. ",
        )

        self.parser.add_argument(
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@ -92,9 +92,9 @@ class StackRun(Subcommand):
            )

        if not config_file.exists() and not has_yaml_suffix:
-            # check if it's a build config saved to docker dir
+            # check if it's a build config saved to container dir
            config_file = Path(
-                BUILDS_BASE_DIR / ImageType.docker.value / f"{args.config}-run.yaml"
+                BUILDS_BASE_DIR / ImageType.container.value / f"{args.config}-run.yaml"
            )

        if not config_file.exists() and not has_yaml_suffix:
@ -115,12 +115,12 @@ class StackRun(Subcommand):
        config_dict = yaml.safe_load(config_file.read_text())
        config = parse_and_maybe_upgrade_config(config_dict)

-        if config.docker_image:
+        if config.container_image:
            script = (
                importlib.resources.files("llama_stack")
                / "distribution/start_container.sh"
            )
-            run_args = [script, config.docker_image]
+            run_args = [script, config.container_image]
        else:
            current_conda_env = os.environ.get("CONDA_DEFAULT_ENV")
            image_name = args.image_name or current_conda_env
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@ -38,7 +38,7 @@ SERVER_DEPENDENCIES = [


 class ImageType(Enum):
-    docker = "docker"
+    container = "container"
    conda = "conda"
    venv = "venv"

@ -77,8 +77,8 @@ def get_provider_dependencies(

            provider_spec = providers_for_api[provider_type]
            deps.extend(provider_spec.pip_packages)
-            if provider_spec.docker_image:
-                raise ValueError("A stack's dependencies cannot have a docker image")
+            if provider_spec.container_image:
+                raise ValueError("A stack's dependencies cannot have a container image")

    normal_deps = []
    special_deps = []
@ -109,23 +109,25 @@ def build_image(
    image_name: str,
    template_name: Optional[str] = None,
 ):
-    docker_image = build_config.distribution_spec.docker_image or "python:3.10-slim"
+    container_image = (
+        build_config.distribution_spec.container_image or "python:3.10-slim"
+    )

    normal_deps, special_deps = get_provider_dependencies(
        build_config.distribution_spec.providers
    )
    normal_deps += SERVER_DEPENDENCIES

-    if build_config.image_type == ImageType.docker.value:
+    if build_config.image_type == ImageType.container.value:
        script = str(
            importlib.resources.files("llama_stack") / "distribution/build_container.sh"
        )
        args = [
            script,
            image_name,
-            docker_image,
+            container_image,
            str(build_file_path),
-            str(BUILDS_BASE_DIR / ImageType.docker.value),
+            str(BUILDS_BASE_DIR / ImageType.container.value),
            " ".join(normal_deps),
        ]
    elif build_config.image_type == ImageType.conda.value:
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@ -13,7 +13,7 @@ PYPI_VERSION=${PYPI_VERSION:-}
 BUILD_PLATFORM=${BUILD_PLATFORM:-}

 if [ "$#" -lt 4 ]; then
-  echo "Usage: $0 <build_name> <docker_base> <pip_dependencies> [<special_pip_deps>]" >&2
+  echo "Usage: $0 <build_name> <container_base> <pip_dependencies> [<special_pip_deps>]" >&2
  echo "Example: $0 my-fastapi-app python:3.9-slim 'fastapi uvicorn' " >&2
  exit 1
 fi
@ -24,7 +24,7 @@ set -euo pipefail

 build_name="$1"
 image_name="distribution-$build_name"
-docker_base=$2
+container_base=$2
 build_file_path=$3
 host_build_dir=$4
 pip_dependencies=$5
@ -36,14 +36,14 @@ NC='\033[0m' # No Color

 SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
 REPO_DIR=$(dirname $(dirname "$SCRIPT_DIR"))
-DOCKER_BINARY=${DOCKER_BINARY:-docker}
-DOCKER_OPTS=${DOCKER_OPTS:-}
+CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
+CONTAINER_OPTS=${CONTAINER_OPTS:-}

 TEMP_DIR=$(mktemp -d)

-add_to_docker() {
+add_to_container() {
  local input
-  output_file="$TEMP_DIR/Dockerfile"
+  output_file="$TEMP_DIR/Containerfile"
  if [ -t 0 ]; then
    printf '%s\n' "$1" >>"$output_file"
  else
@ -53,9 +53,9 @@ add_to_docker() {
 }

 # Update and install UBI9 components if UBI9 base image is used
-if [[ $docker_base == *"registry.access.redhat.com/ubi9"* ]]; then
-  add_to_docker << EOF
-FROM $docker_base
+if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
+  add_to_container << EOF
+FROM $container_base
 WORKDIR /app

 RUN microdnf -y update && microdnf install -y iputils net-tools wget \
@ -64,8 +64,8 @@ RUN microdnf -y update && microdnf install -y iputils net-tools wget \

 EOF
 else
-  add_to_docker << EOF
-FROM $docker_base
+  add_to_container << EOF
+FROM $container_base
 WORKDIR /app

 RUN apt-get update && apt-get install -y \
@ -82,7 +82,7 @@ fi
 # Add pip dependencies first since llama-stack is what will change most often
 # so we can reuse layers.
 if [ -n "$pip_dependencies" ]; then
-  add_to_docker << EOF
+  add_to_container << EOF
 RUN pip install --no-cache $pip_dependencies
 EOF
 fi
@ -90,7 +90,7 @@ fi
 if [ -n "$special_pip_deps" ]; then
  IFS='#' read -ra parts <<<"$special_pip_deps"
  for part in "${parts[@]}"; do
-    add_to_docker <<EOF
+    add_to_container <<EOF
 RUN pip install --no-cache $part
 EOF
  done
@ -108,16 +108,16 @@ if [ -n "$LLAMA_STACK_DIR" ]; then
  # Install in editable format. We will mount the source code into the container
  # so that changes will be reflected in the container without having to do a
  # rebuild. This is just for development convenience.
-  add_to_docker << EOF
+  add_to_container << EOF
 RUN pip install --no-cache -e $stack_mount
 EOF
 else
  if [ -n "$TEST_PYPI_VERSION" ]; then
    # these packages are damaged in test-pypi, so install them first
-    add_to_docker << EOF
+    add_to_container << EOF
 RUN pip install fastapi libcst
 EOF
-    add_to_docker << EOF
+    add_to_container << EOF
 RUN pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \
  llama-models==$TEST_PYPI_VERSION llama-stack-client==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION

@ -128,7 +128,7 @@ EOF
    else
      SPEC_VERSION="llama-stack"
    fi
-    add_to_docker << EOF
+    add_to_container << EOF
 RUN pip install --no-cache $SPEC_VERSION
 EOF
  fi
@ -140,14 +140,14 @@ if [ -n "$LLAMA_MODELS_DIR" ]; then
    exit 1
  fi

-  add_to_docker << EOF
+  add_to_container << EOF
 RUN pip uninstall -y llama-models
 RUN pip install --no-cache $models_mount

 EOF
 fi

-add_to_docker << EOF
+add_to_container << EOF

 # This would be good in production but for debugging flexibility lets not add it right now
 # We need a more solid production ready entrypoint.sh anyway
@ -156,8 +156,8 @@ ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--templat

 EOF

-printf "Dockerfile created successfully in $TEMP_DIR/Dockerfile\n\n"
-cat $TEMP_DIR/Dockerfile
+printf "Containerfile created successfully in $TEMP_DIR/Containerfile\n\n"
+cat $TEMP_DIR/Containerfile
 printf "\n"

 mounts=""
@ -170,7 +170,7 @@ fi

 if command -v selinuxenabled &>/dev/null && selinuxenabled; then
  # Disable SELinux labels -- we don't want to relabel the llama-stack source dir
-  DOCKER_OPTS="$DOCKER_OPTS --security-opt label=disable"
+  CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
 fi

 # Set version tag based on PyPI version
@ -200,7 +200,7 @@ else
 fi

 set -x
-$DOCKER_BINARY build $DOCKER_OPTS $PLATFORM -t $image_tag -f "$TEMP_DIR/Dockerfile" "$REPO_DIR" $mounts
+$CONTAINER_BINARY build $CONTAINER_OPTS $PLATFORM -t $image_tag -f "$TEMP_DIR/Containerfile" "$REPO_DIR" $mounts

 # clean up tmp/configs
 set +x
--- a/llama_stack/distribution/configure_container.sh
+++ b/llama_stack/distribution/configure_container.sh
@ -6,8 +6,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-DOCKER_BINARY=${DOCKER_BINARY:-docker}
-DOCKER_OPTS=${DOCKER_OPTS:-}
+CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
+CONTAINER_OPTS=${CONTAINER_OPTS:-}
 LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}

 set -euo pipefail
@ -24,13 +24,13 @@ if [ $# -lt 2 ]; then
  exit 1
 fi

-docker_image="$1"
+container_image="$1"
 host_build_dir="$2"
 container_build_dir="/app/builds"

 if command -v selinuxenabled &> /dev/null && selinuxenabled; then
  # Disable SELinux labels
-  DOCKER_OPTS="$DOCKER_OPTS --security-opt label=disable"
+  CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
 fi

 mounts=""
@ -39,9 +39,9 @@ if [ -n "$LLAMA_STACK_DIR" ]; then
 fi

 set -x
-$DOCKER_BINARY run $DOCKER_OPTS -it \
+$CONTAINER_BINARY run $CONTAINER_OPTS -it \
  --entrypoint "/usr/local/bin/llama" \
  -v $host_build_dir:$container_build_dir \
  $mounts \
-  $docker_image \
+  $container_image \
  stack configure ./llamastack-build.yaml --output-dir $container_build_dir
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -73,7 +73,7 @@ class AutoRoutedProviderSpec(ProviderSpec):
    provider_type: str = "router"
    config_class: str = ""

-    docker_image: Optional[str] = None
+    container_image: Optional[str] = None
    routing_table_api: Api
    module: str
    provider_data_validator: Optional[str] = Field(
@ -89,7 +89,7 @@ class AutoRoutedProviderSpec(ProviderSpec):
 class RoutingTableProviderSpec(ProviderSpec):
    provider_type: str = "routing_table"
    config_class: str = ""
-    docker_image: Optional[str] = None
+    container_image: Optional[str] = None

    router_api: Api
    module: str
@ -101,7 +101,7 @@ class DistributionSpec(BaseModel):
        default="",
        description="Description of the distribution",
    )
-    docker_image: Optional[str] = None
+    container_image: Optional[str] = None
    providers: Dict[str, Union[str, List[str]]] = Field(
        default_factory=dict,
        description="""
@ -127,9 +127,9 @@ Reference to the distribution this package refers to. For unregistered (adhoc) p
 this could be just a hash
 """,
    )
-    docker_image: Optional[str] = Field(
+    container_image: Optional[str] = Field(
        default=None,
-        description="Reference to the docker image if this package refers to a container",
+        description="Reference to the container image if this package refers to a container",
    )
    apis: List[str] = Field(
        default_factory=list,
@ -168,5 +168,5 @@ class BuildConfig(BaseModel):
    )
    image_type: str = Field(
        default="conda",
-        description="Type of package to build (conda | docker | venv)",
+        description="Type of package to build (conda | container | venv)",
    )
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@ -145,7 +145,9 @@ async def resolve_impls(
                log.warning(
                    f"Provider `{provider.provider_type}` for API `{api}` is deprecated and will be removed in a future release: {p.deprecation_warning}",
                )
-            p.deps__ = [a.value for a in p.api_dependencies]
+            p.deps__ = [a.value for a in p.api_dependencies] + [
+                a.value for a in p.optional_api_dependencies
+            ]
            spec = ProviderWithSpec(
                spec=p,
                **(provider.model_dump()),
@ -229,6 +231,9 @@ async def resolve_impls(
    inner_impls_by_provider_id = {f"inner-{x.value}": {} for x in router_apis}
    for api_str, provider in sorted_providers:
        deps = {a: impls[a] for a in provider.spec.api_dependencies}
+        for a in provider.spec.optional_api_dependencies:
+            if a in impls:
+                deps[a] = impls[a]

        inner_impls = {}
        if isinstance(provider.spec, RoutingTableProviderSpec):
@ -265,7 +270,7 @@ def topological_sort(
                deps.append(dep)

        for dep in deps:
-            if dep not in visited:
+            if dep not in visited and dep in providers_with_specs:
                dfs((dep, providers_with_specs[dep]), visited, stack)

        stack.append(api_str)
--- a/llama_stack/distribution/start_container.sh
+++ b/llama_stack/distribution/start_container.sh
@ -6,8 +6,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-DOCKER_BINARY=${DOCKER_BINARY:-docker}
-DOCKER_OPTS=${DOCKER_OPTS:-}
+CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
+CONTAINER_OPTS=${CONTAINER_OPTS:-}
 LLAMA_CHECKPOINT_DIR=${LLAMA_CHECKPOINT_DIR:-}
 LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
 TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
@ -31,7 +31,7 @@ if [ $# -lt 3 ]; then
 fi

 build_name="$1"
-docker_image="localhost/distribution-$build_name"
+container_image="localhost/distribution-$build_name"
 shift

 yaml_config="$1"
@ -64,7 +64,7 @@ set -x

 if command -v selinuxenabled &> /dev/null && selinuxenabled; then
  # Disable SELinux labels
-  DOCKER_OPTS="$DOCKER_OPTS --security-opt label=disable"
+  CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
 fi

 mounts=""
@ -73,7 +73,7 @@ if [ -n "$LLAMA_STACK_DIR" ]; then
 fi
 if [ -n "$LLAMA_CHECKPOINT_DIR" ]; then
  mounts="$mounts -v $LLAMA_CHECKPOINT_DIR:/root/.llama"
-  DOCKER_OPTS="$DOCKER_OPTS --gpus=all"
+  CONTAINER_OPTS="$CONTAINER_OPTS --gpus=all"
 fi

 version_tag="latest"
@ -85,11 +85,11 @@ elif [ -n "$TEST_PYPI_VERSION" ]; then
  version_tag="test-$TEST_PYPI_VERSION"
 fi

-$DOCKER_BINARY run $DOCKER_OPTS -it \
+$CONTAINER_BINARY run $CONTAINER_OPTS -it \
  -p $port:$port \
  $env_vars \
  -v "$yaml_config:/app/config.yaml" \
  $mounts \
  --env LLAMA_STACK_PORT=$port \
  --entrypoint='["python", "-m", "llama_stack.distribution.server.server", "--yaml-config", "/app/config.yaml"]' \
-  $docker_image:$version_tag
+  $container_image:$version_tag
--- a/llama_stack/distribution/ui/page/distribution/datasets.py
+++ b/llama_stack/distribution/ui/page/distribution/datasets.py
@ -14,6 +14,6 @@ def datasets():
    datasets_info = {
        d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()
    }
-
-    selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
-    st.json(datasets_info[selected_dataset], expanded=True)
+    if len(datasets_info) > 0:
+        selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
+        st.json(datasets_info[selected_dataset], expanded=True)
--- a/llama_stack/distribution/ui/page/distribution/eval_tasks.py
+++ b/llama_stack/distribution/ui/page/distribution/eval_tasks.py
@ -16,7 +16,8 @@ def eval_tasks():
        d.identifier: d.to_dict() for d in llama_stack_api.client.eval_tasks.list()
    }

-    selected_eval_task = st.selectbox(
-        "Select an eval task", list(eval_tasks_info.keys()), key="eval_task_inspect"
-    )
-    st.json(eval_tasks_info[selected_eval_task], expanded=True)
+    if len(eval_tasks_info) > 0:
+        selected_eval_task = st.selectbox(
+            "Select an eval task", list(eval_tasks_info.keys()), key="eval_task_inspect"
+        )
+        st.json(eval_tasks_info[selected_eval_task], expanded=True)
--- a/llama_stack/distribution/ui/page/distribution/providers.py
+++ b/llama_stack/distribution/ui/page/distribution/providers.py
@ -10,11 +10,17 @@ from modules.api import llama_stack_api

 def providers():
    st.header("🔍 API Providers")
-    apis_providers_info = llama_stack_api.client.providers.list()
-    # selected_api = st.selectbox("Select an API", list(apis_providers_info.keys()))
-    for api in apis_providers_info.keys():
+    apis_providers_lst = llama_stack_api.client.providers.list()
+    api_to_providers = {}
+    for api_provider in apis_providers_lst:
+        if api_provider.api in api_to_providers:
+            api_to_providers[api_provider.api].append(api_provider)
+        else:
+            api_to_providers[api_provider.api] = [api_provider]
+
+    for api in api_to_providers.keys():
        st.markdown(f"###### {api}")
-        st.dataframe([p.to_dict() for p in apis_providers_info[api]], width=500)
+        st.dataframe([x.to_dict() for x in api_to_providers[api]], width=500)


 providers()
--- a/llama_stack/distribution/ui/page/playground/chat.py
+++ b/llama_stack/distribution/ui/page/playground/chat.py
@ -121,7 +121,7 @@ if prompt := st.chat_input("Example: What is Llama Stack?"):
        if stream:
            for chunk in response:
                if chunk.event.event_type == "progress":
-                    full_response += chunk.event.delta
+                    full_response += chunk.event.delta.text
                message_placeholder.markdown(full_response + "▌")
            message_placeholder.markdown(full_response)
        else:
--- a/llama_stack/distribution/ui/page/playground/rag.py
+++ b/llama_stack/distribution/ui/page/playground/rag.py
@ -44,14 +44,21 @@ def rag_chat_page():
                ]

                providers = llama_stack_api.client.providers.list()
+                memory_provider = None
+
+                for x in providers:
+                    if x.api == "memory":
+                        memory_provider = x.provider_id
+
                llama_stack_api.client.memory_banks.register(
                    memory_bank_id=memory_bank_name,  # Use the user-provided name
                    params={
+                        "memory_bank_type": "vector",
                        "embedding_model": "all-MiniLM-L6-v2",
                        "chunk_size_in_tokens": 512,
                        "overlap_size_in_tokens": 64,
                    },
-                    provider_id=providers["memory"][0].provider_id,
+                    provider_id=memory_provider,
                )

                # insert documents using the custom bank name
@ -69,9 +76,6 @@ def rag_chat_page():
            "Select Memory Banks",
            memory_banks,
        )
-        memory_bank_configs = [
-            {"bank_id": bank_id, "type": "vector"} for bank_id in selected_memory_banks
-        ]

        available_models = llama_stack_api.client.models.list()
        available_models = [
@ -133,14 +137,13 @@ def rag_chat_page():
        sampling_params={
            "strategy": strategy,
        },
-        tools=[
-            {
-                "type": "memory",
-                "memory_bank_configs": memory_bank_configs,
-                "query_generator_config": {"type": "default", "sep": " "},
-                "max_tokens_in_context": 4096,
-                "max_chunks": 10,
-            }
+        toolgroups=[
+            dict(
+                name="builtin::memory",
+                args={
+                    "memory_bank_ids": [bank_id for bank_id in selected_memory_banks],
+                },
+            )
        ],
        tool_choice="auto",
        tool_prompt_format="json",
@ -179,7 +182,7 @@ def rag_chat_page():
            retrieval_response = ""
            for log in EventLogger().log(response):
                log.print()
-                if log.role == "memory_retrieval":
+                if log.role == "tool_execution":
                    retrieval_response += log.content.replace("====", "").strip()
                    retrieval_message_placeholder.info(retrieval_response)
                else:
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@ -96,6 +96,9 @@ class ProviderSpec(BaseModel):
        default_factory=list,
        description="Higher-level API surfaces may depend on other providers to provide their functionality",
    )
+    optional_api_dependencies: List[Api] = Field(
+        default_factory=list,
+    )
    deprecation_warning: Optional[str] = Field(
        default=None,
        description="If this provider is deprecated, specify the warning message here",
@ -147,11 +150,11 @@ class InlineProviderSpec(ProviderSpec):
        default_factory=list,
        description="The pip dependencies needed for this implementation",
    )
-    docker_image: Optional[str] = Field(
+    container_image: Optional[str] = Field(
        default=None,
        description="""
-The docker image to use for this implementation. If one is provided, pip_packages will be ignored.
-If a provider depends on other providers, the dependencies MUST NOT specify a docker image.
+The container image to use for this implementation. If one is provided, pip_packages will be ignored.
+If a provider depends on other providers, the dependencies MUST NOT specify a container image.
 """,
    )
    module: str = Field(
@ -194,7 +197,7 @@ API responses, specify the adapter here.
    )

    @property
-    def docker_image(self) -> Optional[str]:
+    def container_image(self) -> Optional[str]:
        return None

    @property
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@ -72,7 +72,7 @@ def is_tracing_enabled(tracer):
 class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
    def __init__(self, config: TelemetryConfig, deps: Dict[str, Any]) -> None:
        self.config = config
-        self.datasetio_api = deps[Api.datasetio]
+        self.datasetio_api = deps.get(Api.datasetio)

        resource = Resource.create(
            {
--- a/llama_stack/providers/registry/telemetry.py
+++ b/llama_stack/providers/registry/telemetry.py
@ -24,7 +24,7 @@ def available_providers() -> List[ProviderSpec]:
                "opentelemetry-sdk",
                "opentelemetry-exporter-otlp-proto-http",
            ],
-            api_dependencies=[Api.datasetio],
+            optional_api_dependencies=[Api.datasetio],
            module="llama_stack.providers.inline.telemetry.meta_reference",
            config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
        ),
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -176,7 +176,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
        media_present = request_has_media(request)
        if isinstance(request, ChatCompletionRequest):
            if media_present:
-                # vllm does not seem to work well with image urls, so we download the images
                input_dict["messages"] = [
                    await convert_message_to_openai_dict(m, download=True)
                    for m in request.messages
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@ -6,15 +6,15 @@

 from typing import AsyncGenerator, Dict, List, Optional

-from llama_models.llama3.api.chat_format import ChatFormat
-
-from llama_models.llama3.api.datatypes import (
+from llama_models.datatypes import (
    GreedySamplingStrategy,
    SamplingParams,
-    StopReason,
    TopKSamplingStrategy,
    TopPSamplingStrategy,
 )
+
+from llama_models.llama3.api.chat_format import ChatFormat
+from llama_models.llama3.api.datatypes import StopReason
 from pydantic import BaseModel

 from llama_stack.apis.common.content_types import (
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@ -188,7 +188,7 @@ async def localize_image_content(media: ImageContentItem) -> Tuple[bytes, str]:
 async def convert_image_content_to_url(
    media: ImageContentItem, download: bool = False, include_format: bool = True
 ) -> str:
-    if media.url and not download:
+    if media.url and (not download or media.url.uri.startswith("data")):
        return media.url.uri

    content, format = await localize_image_content(media)
--- a/llama_stack/providers/utils/telemetry/dataset_mixin.py
+++ b/llama_stack/providers/utils/telemetry/dataset_mixin.py
@ -22,6 +22,9 @@ class TelemetryDatasetMixin:
        dataset_id: str,
        max_depth: Optional[int] = None,
    ) -> None:
+        if self.datasetio_api is None:
+            raise RuntimeError("DatasetIO API not available")
+
        spans = await self.query_spans(
            attribute_filters=attribute_filters,
            attributes_to_return=attributes_to_save,
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@ -30,6 +30,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }
    name = "bedrock"
@ -70,7 +71,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use AWS Bedrock for running LLM inference and safety",
-        docker_image=None,
+        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=default_models,
--- a/llama_stack/templates/bedrock/build.yaml
+++ b/llama_stack/templates/bedrock/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@ -81,6 +81,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@ -92,7 +92,7 @@ def get_distribution_template() -> DistributionTemplate:
        name="cerebras",
        distro_type="self_hosted",
        description="Use Cerebras for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=default_models,
--- a/llama_stack/templates/experimental-post-training/build.yaml
+++ b/llama_stack/templates/experimental-post-training/build.yaml
@ -2,7 +2,7 @@ version: '2'
 name: experimental-post-training
 distribution_spec:
  description: Experimental template for post training
-  docker_image: null
+  container_image: null
  providers:
    inference:
    - inline::meta-reference
--- a/llama_stack/templates/experimental-post-training/run.yaml
+++ b/llama_stack/templates/experimental-post-training/run.yaml
@ -1,6 +1,6 @@
 version: '2'
 image_name: experimental-post-training
-docker_image: null
+container_image: null
 conda_env: experimental-post-training
 apis:
 - agents
--- a/llama_stack/templates/fireworks/build.yaml
+++ b/llama_stack/templates/fireworks/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@ -39,6 +39,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }

@ -98,7 +99,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use Fireworks.AI for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=default_models,
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@ -92,6 +92,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@ -86,6 +86,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
--- a/llama_stack/templates/hf-endpoint/build.yaml
+++ b/llama_stack/templates/hf-endpoint/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py
@ -34,6 +34,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }
    name = "hf-endpoint"
@ -88,7 +89,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=None,
        providers=providers,
        default_models=[inference_model, safety_model],
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@ -91,6 +91,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@ -86,6 +86,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
--- a/llama_stack/templates/hf-serverless/build.yaml
+++ b/llama_stack/templates/hf-serverless/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ b/llama_stack/templates/hf-serverless/hf_serverless.py
@ -34,6 +34,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }

@ -89,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=None,
        providers=providers,
        default_models=[inference_model, safety_model],
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@ -91,6 +91,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@ -86,6 +86,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
--- a/llama_stack/templates/meta-reference-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-gpu/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@ -38,6 +38,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }
    name = "meta-reference-gpu"
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@ -93,6 +93,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@ -87,6 +87,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
--- a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
@ -33,6 +33,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }
    default_tool_groups = [
--- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
@ -89,6 +89,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/registry.db
--- a/llama_stack/templates/nvidia/build.yaml
+++ b/llama_stack/templates/nvidia/build.yaml
@ -26,4 +26,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@ -29,6 +29,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }

@ -68,7 +69,7 @@ def get_distribution_template() -> DistributionTemplate:
        name="nvidia",
        distro_type="remote_hosted",
        description="Use NVIDIA NIM for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=default_models,
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@ -83,6 +83,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@ -90,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use (an external) Ollama server for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=[inference_model, safety_model],
--- a/llama_stack/templates/remote-vllm/build.yaml
+++ b/llama_stack/templates/remote-vllm/build.yaml
@ -12,6 +12,15 @@ distribution_spec:
    - inline::llama-guard
    agents:
    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
    telemetry:
    - inline::meta-reference
    tool_runtime:
@ -19,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@ -2,9 +2,12 @@ version: '2'
 image_name: remote-vllm
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 - tool_runtime
 providers:
@ -44,6 +47,28 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -68,6 +93,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@ -2,9 +2,12 @@ version: '2'
 image_name: remote-vllm
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 - tool_runtime
 providers:
@ -38,6 +41,28 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -62,6 +87,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@ -27,12 +27,16 @@ def get_distribution_template() -> DistributionTemplate:
        "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
        "telemetry": ["inline::meta-reference"],
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }
    name = "remote-vllm"
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@ -37,7 +37,7 @@ class RunConfigSettings(BaseModel):
        self,
        name: str,
        providers: Dict[str, List[str]],
-        docker_image: Optional[str] = None,
+        container_image: Optional[str] = None,
    ) -> StackRunConfig:
        provider_registry = get_provider_registry()

@ -83,7 +83,7 @@ class RunConfigSettings(BaseModel):

        return StackRunConfig(
            image_name=name,
-            docker_image=docker_image,
+            container_image=container_image,
            apis=apis,
            providers=provider_configs,
            metadata_store=SqliteKVStoreConfig.sample_run_config(
@ -112,7 +112,7 @@ class DistributionTemplate(BaseModel):

    # Optional configuration
    run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None
-    docker_image: Optional[str] = None
+    container_image: Optional[str] = None

    default_models: Optional[List[ModelInput]] = None

@ -121,7 +121,7 @@ class DistributionTemplate(BaseModel):
            name=self.name,
            distribution_spec=DistributionSpec(
                description=self.description,
-                docker_image=self.docker_image,
+                container_image=self.container_image,
                providers=self.providers,
            ),
            image_type="conda",  # default to conda, can be overridden
@ -169,7 +169,7 @@ class DistributionTemplate(BaseModel):

        for yaml_pth, settings in self.run_configs.items():
            run_config = settings.run_config(
-                self.name, self.providers, self.docker_image
+                self.name, self.providers, self.container_image
            )
            with open(yaml_output_dir / yaml_pth, "w") as f:
                yaml.safe_dump(
--- a/llama_stack/templates/tgi/build.yaml
+++ b/llama_stack/templates/tgi/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@ -86,6 +86,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@ -85,6 +85,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
--- a/llama_stack/templates/tgi/tgi.py
+++ b/llama_stack/templates/tgi/tgi.py
@ -36,6 +36,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }
    name = "tgi"
@ -92,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use (an external) TGI server for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=[inference_model, safety_model],
--- a/llama_stack/templates/together/build.yaml
+++ b/llama_stack/templates/together/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@ -92,6 +92,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@ -86,6 +86,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@ -39,6 +39,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }
    name = "together"
@ -96,7 +97,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use Together.AI for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=default_models,
--- a/llama_stack/templates/vllm-gpu/build.yaml
+++ b/llama_stack/templates/vllm-gpu/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@ -89,6 +89,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db
--- a/llama_stack/templates/vllm-gpu/vllm.py
+++ b/llama_stack/templates/vllm-gpu/vllm.py
@ -33,6 +33,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }

@ -84,7 +85,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use a built-in vLLM engine for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=None,
        providers=providers,
        default_models=[inference_model],