Add Clarifai as Inference Provider

2026-01-07 09:49:56 +00:00 · 2025-03-07 17:01:32 +05:30 · 2025-03-07 17:01:32 +05:30 · e2cc93c017
commit e2cc93c017
parent 2a24eb7f53
16 changed files with 1039 additions and 277 deletions
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@ -101,6 +101,38 @@
    "sentence-transformers --no-deps",
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
  ],
+  "clarifai": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "clarifai",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn"
+  ],
  "dell": [
    "aiohttp",
    "aiosqlite",
--- a/docs/source/distributions/remote_hosted_distro/clarifai.md
+++ b/docs/source/distributions/remote_hosted_distro/clarifai.md
@ -0,0 +1,77 @@
+---
+orphan: true
+---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
+# Clarifai Distribution
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
+The `llamastack/distribution-clarifai` distribution consists of the following provider configurations.
+
+| API | Provider(s) |
+|-----|-------------|
+| agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
+| inference | `remote::clarifai` |
+| safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
+| telemetry | `inline::meta-reference` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
+| vector_io | `inline::faiss` |
+
+
+### Environment Variables
+
+The following environment variables can be configured:
+
+- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
+- `CLARIFAI_PAT`: Clarifai PAT (default: ``)
+
+### Models
+
+The following models are available by default:
+
+- `meta/Llama-3/Llama-3-8B-Instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
+- `meta/Llama-3/llama-3-70B-Instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
+- `meta/Llama-3/llama-3_1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
+- `meta/Llama-3/llama-3_2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
+- `meta/Llama-3/llama-3_3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
+
+
+### Prerequisite: PAT
+
+Make sure you have access to a Clarifai PAT. You can get one by visiting [Clarifai](https://www.clarifai.com/).
+
+
+## Running Llama Stack with Clarifai
+
+You can do this via Conda (build code) or Docker which has a pre-built image.
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+LLAMA_STACK_PORT=5001
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  llamastack/distribution-clarifai \
+  --port $LLAMA_STACK_PORT \
+  --env CLARIFAI_PAT=$CLARIFAI_PAT
+```
+
+### Via Conda
+
+```bash
+llama stack build --template clarifai --image-type conda
+llama stack run ./run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env CLARIFAI_PAT=$CLARIFAI_PAT
+```
--- a/docs/source/distributions/self_hosted_distro/clarifai.md
+++ b/docs/source/distributions/self_hosted_distro/clarifai.md
@ -0,0 +1,77 @@
+---
+orphan: true
+---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
+# Clarifai Distribution
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
+The `llamastack/distribution-clarifai` distribution consists of the following provider configurations.
+
+| API | Provider(s) |
+|-----|-------------|
+| agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
+| inference | `remote::clarifai` |
+| safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
+| telemetry | `inline::meta-reference` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
+| vector_io | `inline::faiss` |
+
+
+### Environment Variables
+
+The following environment variables can be configured:
+
+- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
+- `CLARIFAI_PAT`: Clarifai PAT (default: ``)
+
+### Models
+
+The following models are available by default:
+
+- `meta/Llama-3/Llama-3-8B-Instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
+- `meta/Llama-3/llama-3-70B-Instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
+- `meta/Llama-3/llama-3_1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
+- `meta/Llama-3/llama-3_2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
+- `meta/Llama-3/llama-3_3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
+
+
+### Prerequisite: PAT
+
+Make sure you have access to a Clarifai PAT. You can get one by visiting [Clarifai](https://www.clarifai.com/).
+
+
+## Running Llama Stack with Clarifai
+
+You can do this via Conda (build code) or Docker which has a pre-built image.
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+LLAMA_STACK_PORT=5001
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  llamastack/distribution-clarifai \
+  --port $LLAMA_STACK_PORT \
+  --env CLARIFAI_PAT=$CLARIFAI_PAT
+```
+
+### Via Conda
+
+```bash
+llama stack build --template clarifai --image-type conda
+llama stack run ./run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env CLARIFAI_PAT=$CLARIFAI_PAT
+```
--- a/llama_stack/providers/adapters/inference/clarifai/clarifai.py
+++ b/llama_stack/providers/adapters/inference/clarifai/clarifai.py
@ -1,260 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import AsyncGenerator, List, Optional
-
-from clarifai import client
-
-from llama_models.llama3.api.chat_format import ChatFormat
-from llama_models.llama3.api.datatypes import Message, StopReason
-from llama_models.llama3.api.tokenizer import Tokenizer
-
-from llama_stack.apis.inference import *  # noqa: F403
-from llama_stack.distribution.request_headers import NeedsRequestProviderData
-from llama_stack.providers.utils.inference.augment_messages import (
-    augment_messages_for_tools,
-)
-from llama_stack.providers.utils.inference.routable import RoutableProviderForModels
-
-from .config import ClarifaiImplConfig
-
-
-CLARIFAI_SUPPORTED_MODELS = {
-    "Llama3.1-8B-Instruct": "meta/Llama-3/llama-3_1-8b-instruct",
-    "Llama3.1-70B-Instruct": "meta/Llama-3/llama-3-70B-Instruct",
-    "Llama3.2-3B-Instruct": "meta/Llama-3/llama-3_2-3b-instruct",
-}
-
-
-class ClarifaiInferenceAdapter(
-    Inference, NeedsRequestProviderData, RoutableProviderForModels
-):
-    def __init__(self, config: ClarifaiImplConfig) -> None:
-        RoutableProviderForModels.__init__(
-            self, stack_to_provider_models_map=CLARIFAI_SUPPORTED_MODELS
-        )
-        self.config = config
-        tokenizer = Tokenizer.get_instance()
-        self.formatter = ChatFormat(tokenizer)
-
-    @property
-    def client(self) -> client:
-        return client
-
-    async def initialize(self) -> None:
-        return
-
-    async def shutdown(self) -> None:
-        pass
-
-    async def completion(
-        self,
-        model: str,
-        content: InterleavedTextMedia,
-        sampling_params: Optional[SamplingParams] = SamplingParams(),
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> AsyncGenerator:
-        raise NotImplementedError()
-
-    def _messages_to_clarifai_messages(self, messages: list[Message]) -> bytes:
-        clarifai_messages = ""
-        for message in messages:
-            if message.role == "ipython":
-                role = "tool"
-            else:
-                role = message.role
-            clarifai_messages += (
-                f"{{'role': '{role}', 'content': '{message.content}'}}\n"
-            )
-
-        return clarifai_messages.encode()
-
-    def get_clarifai_chat_options(self, request: ChatCompletionRequest) -> dict:
-        options = {}
-        if request.sampling_params is not None:
-            for attr in {"temperature", "top_p", "top_k", "max_tokens"}:
-                if getattr(request.sampling_params, attr):
-                    options[attr] = getattr(request.sampling_params, attr)
-
-        return options
-
-    def resolve_clarifai_model(self, model_name: str) -> str:
-        model = self.map_to_provider_model(model_name)
-        assert (
-            model is not None and model in CLARIFAI_SUPPORTED_MODELS.values()
-        ), f"Unsupported model: {model_name}, use one of the supported models: {','.join(CLARIFAI_SUPPORTED_MODELS.keys())}"
-        user_id, app_id, model_id = model.split("/")
-        return f"https://clarifai.com/{user_id}/{app_id}/models/{model_id}"
-
-    async def chat_completion(
-        self,
-        model: str,
-        messages: List[Message],
-        sampling_params: Optional[SamplingParams] = SamplingParams(),
-        tools: Optional[List[ToolDefinition]] = None,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
-        stream: Optional[bool] = False,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> AsyncGenerator:
-        request = ChatCompletionRequest(
-            model=model,
-            messages=messages,
-            sampling_params=sampling_params,
-            tools=tools or [],
-            tool_choice=tool_choice,
-            tool_prompt_format=tool_prompt_format,
-            stream=stream,
-            logprobs=logprobs,
-        )
-
-        # accumulate sampling params and other options to pass to clarifai
-        options = self.get_clarifai_chat_options(request)
-        clarifai_model = self.resolve_clarifai_model(request.model)
-        messages = augment_messages_for_tools(request)
-
-        if not request.stream:
-            try:
-                r = client.app.Model(
-                    url=clarifai_model, pat=self.config.PAT
-                ).predict_by_bytes(
-                    self._messages_to_clarifai_messages(messages),
-                    input_type="text",
-                    inference_params=options,
-                )
-            except AssertionError as e:
-                if "CLARIFAI_PAT" in str(e):
-                    raise ValueError("Please provide a valid PAT for Clarifai")
-                else:
-                    raise e
-            # TODO : Add stop reason to the response, currently not supported by clarifai.
-            stop_reason = StopReason.end_of_turn
-            completion_message = self.formatter.decode_assistant_message_from_content(
-                r.outputs[0].data.text.raw, stop_reason
-            )
-            yield ChatCompletionResponse(
-                completion_message=completion_message,
-                logprobs=None,
-            )
-        else:
-            yield ChatCompletionResponseStreamChunk(
-                event=ChatCompletionResponseEvent(
-                    event_type=ChatCompletionResponseEventType.start,
-                    delta="",
-                )
-            )
-
-            buffer = ""
-            ipython = False
-            stop_reason = StopReason.end_of_turn
-            # TODO: Add support for stream, currently not supported by clarifai. But mocked for now.
-            try:
-                chunks = [
-                    client.app.Model(url=clarifai_model, pat=self.config.PAT)
-                    .predict_by_bytes(
-                        self._messages_to_clarifai_messages(messages),
-                        input_type="text",
-                        inference_params=options,
-                    )
-                    .outputs[0]
-                    .data.text.raw
-                ]
-            except AssertionError as e:
-                if "CLARIFAI_PAT" in str(e):
-                    raise ValueError("Please provide a valid PAT for Clarifai")
-                else:
-                    raise e
-            for chunk in chunks:
-                text = chunk
-
-                if text is None:
-                    continue
-
-                # check if its a tool call ( aka starts with <|python_tag|> )
-                if not ipython and text.startswith("<|python_tag|>"):
-                    ipython = True
-                    yield ChatCompletionResponseStreamChunk(
-                        event=ChatCompletionResponseEvent(
-                            event_type=ChatCompletionResponseEventType.progress,
-                            delta=ToolCallDelta(
-                                content="",
-                                parse_status=ToolCallParseStatus.started,
-                            ),
-                        )
-                    )
-                    buffer += text
-                    continue
-
-                if ipython:
-                    if text == "<|eot_id|>":
-                        stop_reason = StopReason.end_of_turn
-                        text = ""
-                        continue
-                    elif text == "<|eom_id|>":
-                        stop_reason = StopReason.end_of_message
-                        text = ""
-                        continue
-
-                    buffer += text
-                    delta = ToolCallDelta(
-                        content=text,
-                        parse_status=ToolCallParseStatus.in_progress,
-                    )
-
-                    yield ChatCompletionResponseStreamChunk(
-                        event=ChatCompletionResponseEvent(
-                            event_type=ChatCompletionResponseEventType.progress,
-                            delta=delta,
-                            stop_reason=stop_reason,
-                        )
-                    )
-                else:
-                    buffer += text
-                    yield ChatCompletionResponseStreamChunk(
-                        event=ChatCompletionResponseEvent(
-                            event_type=ChatCompletionResponseEventType.progress,
-                            delta=text,
-                            stop_reason=stop_reason,
-                        )
-                    )
-
-            # parse tool calls and report errors
-            message = self.formatter.decode_assistant_message_from_content(
-                buffer, stop_reason
-            )
-            parsed_tool_calls = len(message.tool_calls) > 0
-            if ipython and not parsed_tool_calls:
-                yield ChatCompletionResponseStreamChunk(
-                    event=ChatCompletionResponseEvent(
-                        event_type=ChatCompletionResponseEventType.progress,
-                        delta=ToolCallDelta(
-                            content="",
-                            parse_status=ToolCallParseStatus.failure,
-                        ),
-                        stop_reason=stop_reason,
-                    )
-                )
-
-            for tool_call in message.tool_calls:
-                yield ChatCompletionResponseStreamChunk(
-                    event=ChatCompletionResponseEvent(
-                        event_type=ChatCompletionResponseEventType.progress,
-                        delta=ToolCallDelta(
-                            content=tool_call,
-                            parse_status=ToolCallParseStatus.success,
-                        ),
-                        stop_reason=stop_reason,
-                    )
-                )
-
-            yield ChatCompletionResponseStreamChunk(
-                event=ChatCompletionResponseEvent(
-                    event_type=ChatCompletionResponseEventType.complete,
-                    delta="",
-                    stop_reason=stop_reason,
-                )
-            )
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@ -184,8 +184,8 @@ def available_providers() -> List[ProviderSpec]:
                pip_packages=[
                    "clarifai",
                ],
-                module="llama_stack.providers.adapters.inference.clarifai",
-                config_class="llama_stack.providers.adapters.inference.clarifai.ClarifaiImplConfig",
+                module="llama_stack.providers.remote.inference.clarifai",
+                config_class="llama_stack.providers.remote.inference.clarifai.ClarifaiImplConfig",
            ),
        ),
        remote_provider_spec(
--- a/llama_stack/providers/adapters/inference/clarifai/init.py
+++ b/llama_stack/providers/adapters/inference/clarifai/init.py
@ -9,9 +9,7 @@ from .config import ClarifaiImplConfig


 async def get_adapter_impl(config: ClarifaiImplConfig, _deps):
-    assert isinstance(
-        config, ClarifaiImplConfig
-    ), f"Unexpected config type: {type(config)}"
+    assert isinstance(config, ClarifaiImplConfig), f"Unexpected config type: {type(config)}"
    impl = ClarifaiInferenceAdapter(config)
    await impl.initialize()
    return impl
--- a/llama_stack/providers/remote/inference/clarifai/clarifai.py
+++ b/llama_stack/providers/remote/inference/clarifai/clarifai.py
@ -0,0 +1,204 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import AsyncGenerator, List, Optional, Union
+
+from clarifai import client
+
+from llama_stack import logcat
+from llama_stack.apis.common.content_types import (
+    InterleavedContent,
+    InterleavedContentItem,
+)
+from llama_stack.apis.inference import (
+    ChatCompletionRequest,
+    ChatCompletionResponse,
+    CompletionRequest,
+    EmbeddingsResponse,
+    EmbeddingTaskType,
+    Inference,
+    LogProbConfig,
+    Message,
+    ResponseFormat,
+    ResponseFormatType,
+    SamplingParams,
+    TextTruncation,
+    ToolChoice,
+    ToolConfig,
+    ToolDefinition,
+    ToolPromptFormat,
+)
+from llama_stack.distribution.request_headers import NeedsRequestProviderData
+from llama_stack.providers.utils.inference.model_registry import (
+    ModelRegistryHelper,
+)
+from llama_stack.providers.utils.inference.openai_compat import (
+    get_sampling_options,
+    process_chat_completion_response,
+    process_chat_completion_stream_response,
+)
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    chat_completion_request_to_prompt,
+    request_has_media,
+)
+
+from .config import ClarifaiImplConfig
+from .models import MODEL_ENTRIES
+
+
+class ClarifaiInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
+    def __init__(self, config: ClarifaiImplConfig) -> None:
+        ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
+        self.config = config
+
+    async def initialize(self) -> None:
+        return
+
+    async def shutdown(self) -> None:
+        pass
+
+    def _get_client(self) -> client:
+        return client
+
+    async def completion(
+        self,
+        model_id: str,
+        content: InterleavedContent,
+        sampling_params: Optional[SamplingParams] = SamplingParams(),
+        response_format: Optional[ResponseFormat] = None,
+        stream: Optional[bool] = False,
+        logprobs: Optional[LogProbConfig] = None,
+    ) -> AsyncGenerator:
+        return NotImplementedError
+
+    def resolve_clarifai_model(self, model_name: str) -> str:
+        # model = self.get_llama_model(model_name)
+        # assert (
+        #     model is not None and model in CLARIFAI_SUPPORTED_MODELS.values()
+        # ), f"Unsupported model: {model_name}, use one of the supported models: {','.join(CLARIFAI_SUPPORTED_MODELS.keys())}"
+        user_id, app_id, model_id = model_name.split("/")
+        return f"https://clarifai.com/{user_id}/{app_id}/models/{model_id}"
+
+    # async def _nonstream_completion(self, request: CompletionRequest) -> ChatCompletionResponse:
+    #     params = await self._get_params(request)
+    #     model_url = self.resolve_clarifai_model(request.model)
+    #     r = self._get_client().app.Model(url=model_url, pat=self.config.PAT).predict_by_bytes(**params)
+    #     return process_completion_response(r)
+
+    # async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
+    #     params = await self._get_params(request)
+    #     model_url = self.resolve_clarifai_model(request.model)
+
+    #     async def _to_async_generator():
+    #         s = self._get_client().app.Model(url=model_url, pat=self.config.PAT).stream_by_bytes(**params)
+    #         for chunk in s:
+    #             yield chunk
+
+    #     stream = _to_async_generator()
+    #     async for chunk in process_completion_stream_response(stream):
+    #         yield chunk
+
+    def _build_options(
+        self,
+        sampling_params: Optional[SamplingParams],
+        logprobs: Optional[LogProbConfig],
+        fmt: ResponseFormat,
+    ) -> dict:
+        options = get_sampling_options(sampling_params)
+        if fmt:
+            if fmt.type == ResponseFormatType.json_schema.value:
+                options["response_format"] = {
+                    "type": "json_object",
+                    "schema": fmt.json_schema,
+                }
+            elif fmt.type == ResponseFormatType.grammar.value:
+                raise NotImplementedError("Grammar response format not supported yet")
+            else:
+                raise ValueError(f"Unknown response format {fmt.type}")
+
+        if logprobs and logprobs.top_k:
+            if logprobs.top_k != 1:
+                raise ValueError(
+                    f"Unsupported value: Clarifai only supports logprobs top_k=1. {logprobs.top_k} was provided",
+                )
+            options["logprobs"] = 1
+
+        return options
+
+    async def chat_completion(
+        self,
+        model_id: str,
+        messages: List[Message],
+        sampling_params: Optional[SamplingParams] = SamplingParams(),
+        tools: Optional[List[ToolDefinition]] = None,
+        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
+        tool_prompt_format: Optional[ToolPromptFormat] = None,
+        response_format: Optional[ResponseFormat] = None,
+        stream: Optional[bool] = False,
+        logprobs: Optional[LogProbConfig] = None,
+        tool_config: Optional[ToolConfig] = None,
+    ) -> AsyncGenerator:
+        model = await self.model_store.get_model(model_id)
+        request = ChatCompletionRequest(
+            model=model.provider_resource_id,
+            messages=messages,
+            sampling_params=sampling_params,
+            tools=tools or [],
+            response_format=response_format,
+            stream=stream,
+            logprobs=logprobs,
+            tool_config=tool_config,
+        )
+
+        if stream:
+            return self._stream_chat_completion(request)
+        else:
+            return await self._nonstream_chat_completion(request)
+
+    async def _nonstream_chat_completion(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
+        params = await self._get_params(request)
+        model_url = self.resolve_clarifai_model(request.model)
+        r = self._get_client().app.Model(url=model_url, pat=self.config.PAT).predict_by_bytes(**params)
+        return process_chat_completion_response(r)
+
+    async def _stream_chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator:
+        params = await self._get_params(request)
+        model_url = self.resolve_clarifai_model(request.model)
+
+        async def _to_async_generator():
+            s = self._get_client().app.Model(url=model_url, pat=self.config.PAT).predict_by_bytes(**params)
+            for chunk in s:
+                yield chunk
+
+        stream = _to_async_generator()
+        async for chunk in process_chat_completion_stream_response(stream):
+            yield chunk
+
+    async def _get_params(self, request: Union[ChatCompletionRequest, CompletionRequest]) -> dict:
+        input_dict = {}
+        media_present = request_has_media(request)
+        llama_model = self.get_llama_model(request.model)
+        if isinstance(request, ChatCompletionRequest):
+            assert not media_present, "Clarifai does not support media for ChatCompletion requests"
+            input_dict["input_bytes"] = (await chat_completion_request_to_prompt(request, llama_model)).encode()
+
+        params = {
+            **input_dict,
+            "input_type": "text",
+            "inference_params": self._build_options(request.sampling_params, request.logprobs, request.response_format),
+        }
+        logcat.debug("inference", f"params to clarifai: {params}")
+        return params
+
+    async def embeddings(
+        self,
+        model_id: str,
+        contents: List[str] | List[InterleavedContentItem],
+        text_truncation: Optional[TextTruncation] = TextTruncation.none,
+        output_dimension: Optional[int] = None,
+        task_type: Optional[EmbeddingTaskType] = None,
+    ) -> EmbeddingsResponse:
+        raise NotImplementedError()
--- a/llama_stack/providers/adapters/inference/clarifai/config.py
+++ b/llama_stack/providers/adapters/inference/clarifai/config.py
@ -4,11 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from typing import Optional
+from typing import Any, Dict

-from llama_models.schema_utils import json_schema_type
 from pydantic import BaseModel, Field

+from llama_stack.schema_utils import json_schema_type
+

@json_schema_type
 class ClarifaiImplConfig(BaseModel):
@ -16,3 +17,9 @@ class ClarifaiImplConfig(BaseModel):
        default=None,
        description="The Clarifai Personal Access Token (PAT) to use for authentication.",
    )
+
+    @classmethod
+    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
+        return {
+            "PAT": "${env.CLARIFAI_PAT}",
+        }
--- a/llama_stack/providers/remote/inference/clarifai/models.py
+++ b/llama_stack/providers/remote/inference/clarifai/models.py
@ -0,0 +1,33 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_hf_repo_model_entry,
+)
+
+MODEL_ENTRIES = [
+    build_hf_repo_model_entry(
+        "meta/Llama-3/Llama-3-8B-Instruct",
+        CoreModelId.llama3_8b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/Llama-3/llama-3-70B-Instruct",
+        CoreModelId.llama3_70b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/Llama-3/llama-3_1-8b-instruct",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/Llama-3/llama-3_2-3b-instruct",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/Llama-3/llama-3_3-70b-instruct",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+]
--- a/llama_stack/templates/clarifai/init.py
+++ b/llama_stack/templates/clarifai/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .clarifai import get_distribution_template  # noqa: F401
--- a/llama_stack/templates/clarifai/build.yaml
+++ b/llama_stack/templates/clarifai/build.yaml
@ -0,0 +1,30 @@
+version: '2'
+distribution_spec:
+  description: Use Clarifai for running LLM inference
+  providers:
+    inference:
+    - remote::clarifai
+    vector_io:
+    - inline::faiss
+    safety:
+    - inline::llama-guard
+    agents:
+    - inline::meta-reference
+    telemetry:
+    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - inline::code-interpreter
+    - inline::rag-runtime
+    - remote::model-context-protocol
+image_type: conda
--- a/llama_stack/templates/clarifai/clarifai.py
+++ b/llama_stack/templates/clarifai/clarifai.py
@ -0,0 +1,160 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pathlib import Path
+
+from llama_stack.apis.models.models import ModelType
+from llama_stack.distribution.datatypes import (
+    ModelInput,
+    Provider,
+    ShieldInput,
+    ToolGroupInput,
+)
+from llama_stack.providers.inline.inference.sentence_transformers import (
+    SentenceTransformersInferenceConfig,
+)
+from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
+from llama_stack.providers.remote.inference.clarifai import ClarifaiImplConfig
+from llama_stack.providers.remote.inference.clarifai.models import MODEL_ENTRIES
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
+
+
+def get_distribution_template() -> DistributionTemplate:
+    providers = {
+        "inference": ["remote::clarifai"],
+        "vector_io": ["inline::faiss"],
+        "safety": ["inline::llama-guard"],
+        "agents": ["inline::meta-reference"],
+        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "inline::code-interpreter",
+            "inline::rag-runtime",
+            "remote::model-context-protocol",
+        ],
+    }
+    name = "clarifai"
+    inference_provider = Provider(
+        provider_id="clarifai",
+        provider_type="remote::clarifai",
+        config=ClarifaiImplConfig.sample_run_config(),
+    )
+    vector_io_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )
+    embedding_provider = Provider(
+        provider_id="sentence-transformers",
+        provider_type="inline::sentence-transformers",
+        config=SentenceTransformersInferenceConfig.sample_run_config(),
+    )
+    available_models = {
+        "clarifai": MODEL_ENTRIES,
+    }
+    default_models = get_model_registry(available_models)
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="tavily-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::code_interpreter",
+            provider_id="code-interpreter",
+        ),
+    ]
+    embedding_model = ModelInput(
+        model_id="all-MiniLM-L6-v2",
+        provider_id="sentence-transformers",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 384,
+        },
+    )
+
+    return DistributionTemplate(
+        name=name,
+        distro_type="remote_hosted",
+        description="Use Clarifai for running LLM inference",
+        container_image=None,
+        template_path=Path(__file__).parent / "doc_template.md",
+        providers=providers,
+        available_models_by_provider=available_models,
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [inference_provider, embedding_provider],
+                    "vector_io": [vector_io_provider],
+                },
+                default_models=default_models + [embedding_model],
+                default_tool_groups=default_tool_groups,
+                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
+            ),
+            "run-with-safety.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [
+                        inference_provider,
+                        embedding_provider,
+                    ],
+                    "vector_io": [vector_io_provider],
+                    "safety": [
+                        Provider(
+                            provider_id="llama-guard",
+                            provider_type="inline::llama-guard",
+                            config={},
+                        ),
+                        Provider(
+                            provider_id="llama-guard-vision",
+                            provider_type="inline::llama-guard",
+                            config={},
+                        ),
+                        Provider(
+                            provider_id="code-scanner",
+                            provider_type="inline::code-scanner",
+                            config={},
+                        ),
+                    ],
+                },
+                default_models=[
+                    *default_models,
+                    embedding_model,
+                ],
+                default_shields=[
+                    ShieldInput(
+                        shield_id="meta-llama/Llama-Guard-3-8B",
+                        provider_id="llama-guard",
+                    ),
+                    ShieldInput(
+                        shield_id="meta-llama/Llama-Guard-3-11B-Vision",
+                        provider_id="llama-guard-vision",
+                    ),
+                    ShieldInput(
+                        shield_id="CodeScanner",
+                        provider_id="code-scanner",
+                    ),
+                ],
+                default_tool_groups=default_tool_groups,
+            ),
+        },
+        run_config_env_vars={
+            "LLAMA_STACK_PORT": (
+                "5001",
+                "Port for the Llama Stack distribution server",
+            ),
+            "CLARIFAI_PAT": (
+                "",
+                "Clarifai PAT",
+            ),
+        },
+    )
--- a/llama_stack/templates/clarifai/doc_template.md
+++ b/llama_stack/templates/clarifai/doc_template.md
@ -0,0 +1,68 @@
+---
+orphan: true
+---
+# Clarifai Distribution
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
+The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
+
+{{ providers_table }}
+
+{% if run_config_env_vars %}
+### Environment Variables
+
+The following environment variables can be configured:
+
+{% for var, (default_value, description) in run_config_env_vars.items() %}
+- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
+{% endfor %}
+{% endif %}
+
+{% if default_models %}
+### Models
+
+The following models are available by default:
+
+{% for model in default_models %}
+- `{{ model.model_id }} {{ model.doc_string }}`
+{% endfor %}
+{% endif %}
+
+
+### Prerequisite: PAT
+
+Make sure you have access to a Clarifai PAT. You can get one by visiting [Clarifai](https://www.clarifai.com/).
+
+
+## Running Llama Stack with Clarifai
+
+You can do this via Conda (build code) or Docker which has a pre-built image.
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+LLAMA_STACK_PORT=5001
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  llamastack/distribution-{{ name }} \
+  --port $LLAMA_STACK_PORT \
+  --env CLARIFAI_PAT=$CLARIFAI_PAT
+```
+
+### Via Conda
+
+```bash
+llama stack build --template clarifai --image-type conda
+llama stack run ./run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env CLARIFAI_PAT=$CLARIFAI_PAT
+```
--- a/llama_stack/templates/clarifai/run-with-safety.yaml
+++ b/llama_stack/templates/clarifai/run-with-safety.yaml
@ -0,0 +1,175 @@
+version: '2'
+image_name: clarifai
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: clarifai
+    provider_type: remote::clarifai
+    config:
+      PAT: ${env.CLARIFAI_PAT}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  - provider_id: llama-guard-vision
+    provider_type: inline::llama-guard
+    config: {}
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/clarifai/trace_store.db}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: code-interpreter
+    provider_type: inline::code-interpreter
+    config: {}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/registry.db
+models:
+- metadata: {}
+  model_id: meta/Llama-3/Llama-3-8B-Instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3-8B-Instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta/Llama-3/llama-3-70B-Instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3-70B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3-70B-Instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3-70B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta/Llama-3/llama-3_1-8b-instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-8B-Instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta/Llama-3/llama-3_2-3b-instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-3B-Instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta/Llama-3/llama-3_3-70b-instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
+shields:
+- shield_id: meta-llama/Llama-Guard-3-8B
+  provider_id: llama-guard
+- shield_id: meta-llama/Llama-Guard-3-11B-Vision
+  provider_id: llama-guard-vision
+- shield_id: CodeScanner
+  provider_id: code-scanner
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+- toolgroup_id: builtin::code_interpreter
+  provider_id: code-interpreter
+server:
+  port: 8321
--- a/llama_stack/templates/clarifai/run.yaml
+++ b/llama_stack/templates/clarifai/run.yaml
@ -0,0 +1,164 @@
+version: '2'
+image_name: clarifai
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: clarifai
+    provider_type: remote::clarifai
+    config:
+      PAT: ${env.CLARIFAI_PAT}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/clarifai/trace_store.db}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: code-interpreter
+    provider_type: inline::code-interpreter
+    config: {}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/registry.db
+models:
+- metadata: {}
+  model_id: meta/Llama-3/Llama-3-8B-Instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3-8B-Instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta/Llama-3/llama-3-70B-Instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3-70B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3-70B-Instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3-70B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta/Llama-3/llama-3_1-8b-instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-8B-Instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta/Llama-3/llama-3_2-3b-instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-3B-Instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta/Llama-3/llama-3_3-70b-instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct
+  provider_id: clarifai
+  provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
+shields:
+- shield_id: meta-llama/Llama-Guard-3-8B
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+- toolgroup_id: builtin::code_interpreter
+  provider_id: code-interpreter
+server:
+  port: 8321
--- a/llama_stack/templates/local-clarifai-build.yaml
+++ b/llama_stack/templates/local-clarifai-build.yaml
@ -1,10 +0,0 @@
-name: local-clarifai
-distribution_spec:
-  description: Use Clarifai for running LLM inference
-  providers:
-    inference: remote::clarifai
-    memory: meta-reference
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: conda