Add Clarifai as Inference Provider

This commit is contained in:
Srikanth Bachala 2025-03-07 17:01:32 +05:30
parent 2a24eb7f53
commit e2cc93c017
16 changed files with 1039 additions and 277 deletions

View file

@ -101,6 +101,38 @@
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu" "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"clarifai": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"clarifai",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn"
],
"dell": [ "dell": [
"aiohttp", "aiohttp",
"aiosqlite", "aiosqlite",

View file

@ -0,0 +1,77 @@
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Clarifai Distribution
```{toctree}
:maxdepth: 2
:hidden:
self
```
The `llamastack/distribution-clarifai` distribution consists of the following provider configurations.
| API | Provider(s) |
|-----|-------------|
| agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `remote::clarifai` |
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
| vector_io | `inline::faiss` |
### Environment Variables
The following environment variables can be configured:
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
- `CLARIFAI_PAT`: Clarifai PAT (default: ``)
### Models
The following models are available by default:
- `meta/Llama-3/Llama-3-8B-Instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
- `meta/Llama-3/llama-3-70B-Instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
- `meta/Llama-3/llama-3_1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta/Llama-3/llama-3_2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta/Llama-3/llama-3_3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
### Prerequisite: PAT
Make sure you have access to a Clarifai PAT. You can get one by visiting [Clarifai](https://www.clarifai.com/).
## Running Llama Stack with Clarifai
You can do this via Conda (build code) or Docker which has a pre-built image.
### Via Docker
This method allows you to get started quickly without having to build the distribution code.
```bash
LLAMA_STACK_PORT=5001
docker run \
-it \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
llamastack/distribution-clarifai \
--port $LLAMA_STACK_PORT \
--env CLARIFAI_PAT=$CLARIFAI_PAT
```
### Via Conda
```bash
llama stack build --template clarifai --image-type conda
llama stack run ./run.yaml \
--port $LLAMA_STACK_PORT \
--env CLARIFAI_PAT=$CLARIFAI_PAT
```

View file

@ -0,0 +1,77 @@
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Clarifai Distribution
```{toctree}
:maxdepth: 2
:hidden:
self
```
The `llamastack/distribution-clarifai` distribution consists of the following provider configurations.
| API | Provider(s) |
|-----|-------------|
| agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `remote::clarifai` |
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
| vector_io | `inline::faiss` |
### Environment Variables
The following environment variables can be configured:
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
- `CLARIFAI_PAT`: Clarifai PAT (default: ``)
### Models
The following models are available by default:
- `meta/Llama-3/Llama-3-8B-Instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
- `meta/Llama-3/llama-3-70B-Instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
- `meta/Llama-3/llama-3_1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta/Llama-3/llama-3_2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta/Llama-3/llama-3_3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
### Prerequisite: PAT
Make sure you have access to a Clarifai PAT. You can get one by visiting [Clarifai](https://www.clarifai.com/).
## Running Llama Stack with Clarifai
You can do this via Conda (build code) or Docker which has a pre-built image.
### Via Docker
This method allows you to get started quickly without having to build the distribution code.
```bash
LLAMA_STACK_PORT=5001
docker run \
-it \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
llamastack/distribution-clarifai \
--port $LLAMA_STACK_PORT \
--env CLARIFAI_PAT=$CLARIFAI_PAT
```
### Via Conda
```bash
llama stack build --template clarifai --image-type conda
llama stack run ./run.yaml \
--port $LLAMA_STACK_PORT \
--env CLARIFAI_PAT=$CLARIFAI_PAT
```

View file

@ -1,260 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import AsyncGenerator, List, Optional
from clarifai import client
from llama_models.llama3.api.chat_format import ChatFormat
from llama_models.llama3.api.datatypes import Message, StopReason
from llama_models.llama3.api.tokenizer import Tokenizer
from llama_stack.apis.inference import * # noqa: F403
from llama_stack.distribution.request_headers import NeedsRequestProviderData
from llama_stack.providers.utils.inference.augment_messages import (
augment_messages_for_tools,
)
from llama_stack.providers.utils.inference.routable import RoutableProviderForModels
from .config import ClarifaiImplConfig
CLARIFAI_SUPPORTED_MODELS = {
"Llama3.1-8B-Instruct": "meta/Llama-3/llama-3_1-8b-instruct",
"Llama3.1-70B-Instruct": "meta/Llama-3/llama-3-70B-Instruct",
"Llama3.2-3B-Instruct": "meta/Llama-3/llama-3_2-3b-instruct",
}
class ClarifaiInferenceAdapter(
Inference, NeedsRequestProviderData, RoutableProviderForModels
):
def __init__(self, config: ClarifaiImplConfig) -> None:
RoutableProviderForModels.__init__(
self, stack_to_provider_models_map=CLARIFAI_SUPPORTED_MODELS
)
self.config = config
tokenizer = Tokenizer.get_instance()
self.formatter = ChatFormat(tokenizer)
@property
def client(self) -> client:
return client
async def initialize(self) -> None:
return
async def shutdown(self) -> None:
pass
async def completion(
self,
model: str,
content: InterleavedTextMedia,
sampling_params: Optional[SamplingParams] = SamplingParams(),
stream: Optional[bool] = False,
logprobs: Optional[LogProbConfig] = None,
) -> AsyncGenerator:
raise NotImplementedError()
def _messages_to_clarifai_messages(self, messages: list[Message]) -> bytes:
clarifai_messages = ""
for message in messages:
if message.role == "ipython":
role = "tool"
else:
role = message.role
clarifai_messages += (
f"{{'role': '{role}', 'content': '{message.content}'}}\n"
)
return clarifai_messages.encode()
def get_clarifai_chat_options(self, request: ChatCompletionRequest) -> dict:
options = {}
if request.sampling_params is not None:
for attr in {"temperature", "top_p", "top_k", "max_tokens"}:
if getattr(request.sampling_params, attr):
options[attr] = getattr(request.sampling_params, attr)
return options
def resolve_clarifai_model(self, model_name: str) -> str:
model = self.map_to_provider_model(model_name)
assert (
model is not None and model in CLARIFAI_SUPPORTED_MODELS.values()
), f"Unsupported model: {model_name}, use one of the supported models: {','.join(CLARIFAI_SUPPORTED_MODELS.keys())}"
user_id, app_id, model_id = model.split("/")
return f"https://clarifai.com/{user_id}/{app_id}/models/{model_id}"
async def chat_completion(
self,
model: str,
messages: List[Message],
sampling_params: Optional[SamplingParams] = SamplingParams(),
tools: Optional[List[ToolDefinition]] = None,
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
stream: Optional[bool] = False,
logprobs: Optional[LogProbConfig] = None,
) -> AsyncGenerator:
request = ChatCompletionRequest(
model=model,
messages=messages,
sampling_params=sampling_params,
tools=tools or [],
tool_choice=tool_choice,
tool_prompt_format=tool_prompt_format,
stream=stream,
logprobs=logprobs,
)
# accumulate sampling params and other options to pass to clarifai
options = self.get_clarifai_chat_options(request)
clarifai_model = self.resolve_clarifai_model(request.model)
messages = augment_messages_for_tools(request)
if not request.stream:
try:
r = client.app.Model(
url=clarifai_model, pat=self.config.PAT
).predict_by_bytes(
self._messages_to_clarifai_messages(messages),
input_type="text",
inference_params=options,
)
except AssertionError as e:
if "CLARIFAI_PAT" in str(e):
raise ValueError("Please provide a valid PAT for Clarifai")
else:
raise e
# TODO : Add stop reason to the response, currently not supported by clarifai.
stop_reason = StopReason.end_of_turn
completion_message = self.formatter.decode_assistant_message_from_content(
r.outputs[0].data.text.raw, stop_reason
)
yield ChatCompletionResponse(
completion_message=completion_message,
logprobs=None,
)
else:
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.start,
delta="",
)
)
buffer = ""
ipython = False
stop_reason = StopReason.end_of_turn
# TODO: Add support for stream, currently not supported by clarifai. But mocked for now.
try:
chunks = [
client.app.Model(url=clarifai_model, pat=self.config.PAT)
.predict_by_bytes(
self._messages_to_clarifai_messages(messages),
input_type="text",
inference_params=options,
)
.outputs[0]
.data.text.raw
]
except AssertionError as e:
if "CLARIFAI_PAT" in str(e):
raise ValueError("Please provide a valid PAT for Clarifai")
else:
raise e
for chunk in chunks:
text = chunk
if text is None:
continue
# check if its a tool call ( aka starts with <|python_tag|> )
if not ipython and text.startswith("<|python_tag|>"):
ipython = True
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.progress,
delta=ToolCallDelta(
content="",
parse_status=ToolCallParseStatus.started,
),
)
)
buffer += text
continue
if ipython:
if text == "<|eot_id|>":
stop_reason = StopReason.end_of_turn
text = ""
continue
elif text == "<|eom_id|>":
stop_reason = StopReason.end_of_message
text = ""
continue
buffer += text
delta = ToolCallDelta(
content=text,
parse_status=ToolCallParseStatus.in_progress,
)
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.progress,
delta=delta,
stop_reason=stop_reason,
)
)
else:
buffer += text
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.progress,
delta=text,
stop_reason=stop_reason,
)
)
# parse tool calls and report errors
message = self.formatter.decode_assistant_message_from_content(
buffer, stop_reason
)
parsed_tool_calls = len(message.tool_calls) > 0
if ipython and not parsed_tool_calls:
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.progress,
delta=ToolCallDelta(
content="",
parse_status=ToolCallParseStatus.failure,
),
stop_reason=stop_reason,
)
)
for tool_call in message.tool_calls:
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.progress,
delta=ToolCallDelta(
content=tool_call,
parse_status=ToolCallParseStatus.success,
),
stop_reason=stop_reason,
)
)
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.complete,
delta="",
stop_reason=stop_reason,
)
)

View file

@ -184,8 +184,8 @@ def available_providers() -> List[ProviderSpec]:
pip_packages=[ pip_packages=[
"clarifai", "clarifai",
], ],
module="llama_stack.providers.adapters.inference.clarifai", module="llama_stack.providers.remote.inference.clarifai",
config_class="llama_stack.providers.adapters.inference.clarifai.ClarifaiImplConfig", config_class="llama_stack.providers.remote.inference.clarifai.ClarifaiImplConfig",
), ),
), ),
remote_provider_spec( remote_provider_spec(

View file

@ -9,9 +9,7 @@ from .config import ClarifaiImplConfig
async def get_adapter_impl(config: ClarifaiImplConfig, _deps): async def get_adapter_impl(config: ClarifaiImplConfig, _deps):
assert isinstance( assert isinstance(config, ClarifaiImplConfig), f"Unexpected config type: {type(config)}"
config, ClarifaiImplConfig
), f"Unexpected config type: {type(config)}"
impl = ClarifaiInferenceAdapter(config) impl = ClarifaiInferenceAdapter(config)
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -0,0 +1,204 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import AsyncGenerator, List, Optional, Union
from clarifai import client
from llama_stack import logcat
from llama_stack.apis.common.content_types import (
InterleavedContent,
InterleavedContentItem,
)
from llama_stack.apis.inference import (
ChatCompletionRequest,
ChatCompletionResponse,
CompletionRequest,
EmbeddingsResponse,
EmbeddingTaskType,
Inference,
LogProbConfig,
Message,
ResponseFormat,
ResponseFormatType,
SamplingParams,
TextTruncation,
ToolChoice,
ToolConfig,
ToolDefinition,
ToolPromptFormat,
)
from llama_stack.distribution.request_headers import NeedsRequestProviderData
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
)
from llama_stack.providers.utils.inference.openai_compat import (
get_sampling_options,
process_chat_completion_response,
process_chat_completion_stream_response,
)
from llama_stack.providers.utils.inference.prompt_adapter import (
chat_completion_request_to_prompt,
request_has_media,
)
from .config import ClarifaiImplConfig
from .models import MODEL_ENTRIES
class ClarifaiInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
def __init__(self, config: ClarifaiImplConfig) -> None:
ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
self.config = config
async def initialize(self) -> None:
return
async def shutdown(self) -> None:
pass
def _get_client(self) -> client:
return client
async def completion(
self,
model_id: str,
content: InterleavedContent,
sampling_params: Optional[SamplingParams] = SamplingParams(),
response_format: Optional[ResponseFormat] = None,
stream: Optional[bool] = False,
logprobs: Optional[LogProbConfig] = None,
) -> AsyncGenerator:
return NotImplementedError
def resolve_clarifai_model(self, model_name: str) -> str:
# model = self.get_llama_model(model_name)
# assert (
# model is not None and model in CLARIFAI_SUPPORTED_MODELS.values()
# ), f"Unsupported model: {model_name}, use one of the supported models: {','.join(CLARIFAI_SUPPORTED_MODELS.keys())}"
user_id, app_id, model_id = model_name.split("/")
return f"https://clarifai.com/{user_id}/{app_id}/models/{model_id}"
# async def _nonstream_completion(self, request: CompletionRequest) -> ChatCompletionResponse:
# params = await self._get_params(request)
# model_url = self.resolve_clarifai_model(request.model)
# r = self._get_client().app.Model(url=model_url, pat=self.config.PAT).predict_by_bytes(**params)
# return process_completion_response(r)
# async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
# params = await self._get_params(request)
# model_url = self.resolve_clarifai_model(request.model)
# async def _to_async_generator():
# s = self._get_client().app.Model(url=model_url, pat=self.config.PAT).stream_by_bytes(**params)
# for chunk in s:
# yield chunk
# stream = _to_async_generator()
# async for chunk in process_completion_stream_response(stream):
# yield chunk
def _build_options(
self,
sampling_params: Optional[SamplingParams],
logprobs: Optional[LogProbConfig],
fmt: ResponseFormat,
) -> dict:
options = get_sampling_options(sampling_params)
if fmt:
if fmt.type == ResponseFormatType.json_schema.value:
options["response_format"] = {
"type": "json_object",
"schema": fmt.json_schema,
}
elif fmt.type == ResponseFormatType.grammar.value:
raise NotImplementedError("Grammar response format not supported yet")
else:
raise ValueError(f"Unknown response format {fmt.type}")
if logprobs and logprobs.top_k:
if logprobs.top_k != 1:
raise ValueError(
f"Unsupported value: Clarifai only supports logprobs top_k=1. {logprobs.top_k} was provided",
)
options["logprobs"] = 1
return options
async def chat_completion(
self,
model_id: str,
messages: List[Message],
sampling_params: Optional[SamplingParams] = SamplingParams(),
tools: Optional[List[ToolDefinition]] = None,
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
tool_prompt_format: Optional[ToolPromptFormat] = None,
response_format: Optional[ResponseFormat] = None,
stream: Optional[bool] = False,
logprobs: Optional[LogProbConfig] = None,
tool_config: Optional[ToolConfig] = None,
) -> AsyncGenerator:
model = await self.model_store.get_model(model_id)
request = ChatCompletionRequest(
model=model.provider_resource_id,
messages=messages,
sampling_params=sampling_params,
tools=tools or [],
response_format=response_format,
stream=stream,
logprobs=logprobs,
tool_config=tool_config,
)
if stream:
return self._stream_chat_completion(request)
else:
return await self._nonstream_chat_completion(request)
async def _nonstream_chat_completion(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
params = await self._get_params(request)
model_url = self.resolve_clarifai_model(request.model)
r = self._get_client().app.Model(url=model_url, pat=self.config.PAT).predict_by_bytes(**params)
return process_chat_completion_response(r)
async def _stream_chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator:
params = await self._get_params(request)
model_url = self.resolve_clarifai_model(request.model)
async def _to_async_generator():
s = self._get_client().app.Model(url=model_url, pat=self.config.PAT).predict_by_bytes(**params)
for chunk in s:
yield chunk
stream = _to_async_generator()
async for chunk in process_chat_completion_stream_response(stream):
yield chunk
async def _get_params(self, request: Union[ChatCompletionRequest, CompletionRequest]) -> dict:
input_dict = {}
media_present = request_has_media(request)
llama_model = self.get_llama_model(request.model)
if isinstance(request, ChatCompletionRequest):
assert not media_present, "Clarifai does not support media for ChatCompletion requests"
input_dict["input_bytes"] = (await chat_completion_request_to_prompt(request, llama_model)).encode()
params = {
**input_dict,
"input_type": "text",
"inference_params": self._build_options(request.sampling_params, request.logprobs, request.response_format),
}
logcat.debug("inference", f"params to clarifai: {params}")
return params
async def embeddings(
self,
model_id: str,
contents: List[str] | List[InterleavedContentItem],
text_truncation: Optional[TextTruncation] = TextTruncation.none,
output_dimension: Optional[int] = None,
task_type: Optional[EmbeddingTaskType] = None,
) -> EmbeddingsResponse:
raise NotImplementedError()

View file

@ -4,11 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from typing import Optional from typing import Any, Dict
from llama_models.schema_utils import json_schema_type
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.schema_utils import json_schema_type
@json_schema_type @json_schema_type
class ClarifaiImplConfig(BaseModel): class ClarifaiImplConfig(BaseModel):
@ -16,3 +17,9 @@ class ClarifaiImplConfig(BaseModel):
default=None, default=None,
description="The Clarifai Personal Access Token (PAT) to use for authentication.", description="The Clarifai Personal Access Token (PAT) to use for authentication.",
) )
@classmethod
def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
return {
"PAT": "${env.CLARIFAI_PAT}",
}

View file

@ -0,0 +1,33 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
build_hf_repo_model_entry,
)
MODEL_ENTRIES = [
build_hf_repo_model_entry(
"meta/Llama-3/Llama-3-8B-Instruct",
CoreModelId.llama3_8b_instruct.value,
),
build_hf_repo_model_entry(
"meta/Llama-3/llama-3-70B-Instruct",
CoreModelId.llama3_70b_instruct.value,
),
build_hf_repo_model_entry(
"meta/Llama-3/llama-3_1-8b-instruct",
CoreModelId.llama3_1_8b_instruct.value,
),
build_hf_repo_model_entry(
"meta/Llama-3/llama-3_2-3b-instruct",
CoreModelId.llama3_2_3b_instruct.value,
),
build_hf_repo_model_entry(
"meta/Llama-3/llama-3_3-70b-instruct",
CoreModelId.llama3_3_70b_instruct.value,
),
]

View file

@ -0,0 +1,7 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .clarifai import get_distribution_template # noqa: F401

View file

@ -0,0 +1,30 @@
version: '2'
distribution_spec:
description: Use Clarifai for running LLM inference
providers:
inference:
- remote::clarifai
vector_io:
- inline::faiss
safety:
- inline::llama-guard
agents:
- inline::meta-reference
telemetry:
- inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda

View file

@ -0,0 +1,160 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from pathlib import Path
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.remote.inference.clarifai import ClarifaiImplConfig
from llama_stack.providers.remote.inference.clarifai.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate:
providers = {
"inference": ["remote::clarifai"],
"vector_io": ["inline::faiss"],
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::rag-runtime",
"remote::model-context-protocol",
],
}
name = "clarifai"
inference_provider = Provider(
provider_id="clarifai",
provider_type="remote::clarifai",
config=ClarifaiImplConfig.sample_run_config(),
)
vector_io_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
)
embedding_provider = Provider(
provider_id="sentence-transformers",
provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(),
)
available_models = {
"clarifai": MODEL_ENTRIES,
}
default_models = get_model_registry(available_models)
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::rag",
provider_id="rag-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2",
provider_id="sentence-transformers",
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 384,
},
)
return DistributionTemplate(
name=name,
distro_type="remote_hosted",
description="Use Clarifai for running LLM inference",
container_image=None,
template_path=Path(__file__).parent / "doc_template.md",
providers=providers,
available_models_by_provider=available_models,
run_configs={
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider, embedding_provider],
"vector_io": [vector_io_provider],
},
default_models=default_models + [embedding_model],
default_tool_groups=default_tool_groups,
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
),
"run-with-safety.yaml": RunConfigSettings(
provider_overrides={
"inference": [
inference_provider,
embedding_provider,
],
"vector_io": [vector_io_provider],
"safety": [
Provider(
provider_id="llama-guard",
provider_type="inline::llama-guard",
config={},
),
Provider(
provider_id="llama-guard-vision",
provider_type="inline::llama-guard",
config={},
),
Provider(
provider_id="code-scanner",
provider_type="inline::code-scanner",
config={},
),
],
},
default_models=[
*default_models,
embedding_model,
],
default_shields=[
ShieldInput(
shield_id="meta-llama/Llama-Guard-3-8B",
provider_id="llama-guard",
),
ShieldInput(
shield_id="meta-llama/Llama-Guard-3-11B-Vision",
provider_id="llama-guard-vision",
),
ShieldInput(
shield_id="CodeScanner",
provider_id="code-scanner",
),
],
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={
"LLAMA_STACK_PORT": (
"5001",
"Port for the Llama Stack distribution server",
),
"CLARIFAI_PAT": (
"",
"Clarifai PAT",
),
},
)

View file

@ -0,0 +1,68 @@
---
orphan: true
---
# Clarifai Distribution
```{toctree}
:maxdepth: 2
:hidden:
self
```
The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
{{ providers_table }}
{% if run_config_env_vars %}
### Environment Variables
The following environment variables can be configured:
{% for var, (default_value, description) in run_config_env_vars.items() %}
- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
{% endfor %}
{% endif %}
{% if default_models %}
### Models
The following models are available by default:
{% for model in default_models %}
- `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %}
{% endif %}
### Prerequisite: PAT
Make sure you have access to a Clarifai PAT. You can get one by visiting [Clarifai](https://www.clarifai.com/).
## Running Llama Stack with Clarifai
You can do this via Conda (build code) or Docker which has a pre-built image.
### Via Docker
This method allows you to get started quickly without having to build the distribution code.
```bash
LLAMA_STACK_PORT=5001
docker run \
-it \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
llamastack/distribution-{{ name }} \
--port $LLAMA_STACK_PORT \
--env CLARIFAI_PAT=$CLARIFAI_PAT
```
### Via Conda
```bash
llama stack build --template clarifai --image-type conda
llama stack run ./run.yaml \
--port $LLAMA_STACK_PORT \
--env CLARIFAI_PAT=$CLARIFAI_PAT
```

View file

@ -0,0 +1,175 @@
version: '2'
image_name: clarifai
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
providers:
inference:
- provider_id: clarifai
provider_type: remote::clarifai
config:
PAT: ${env.CLARIFAI_PAT}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
- provider_id: llama-guard-vision
provider_type: inline::llama-guard
config: {}
- provider_id: code-scanner
provider_type: inline::code-scanner
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/clarifai/trace_store.db}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
config: {}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/registry.db
models:
- metadata: {}
model_id: meta/Llama-3/Llama-3-8B-Instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3-8B-Instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
model_type: llm
- metadata: {}
model_id: meta/Llama-3/llama-3-70B-Instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3-70B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3-70B-Instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3-70B-Instruct
model_type: llm
- metadata: {}
model_id: meta/Llama-3/llama-3_1-8b-instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
model_type: llm
- metadata: {}
model_id: meta/Llama-3/llama-3_2-3b-instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
model_type: llm
- metadata: {}
model_id: meta/Llama-3/llama-3_3-70b-instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
provider_id: llama-guard
- shield_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: llama-guard-vision
- shield_id: CodeScanner
provider_id: code-scanner
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -0,0 +1,164 @@
version: '2'
image_name: clarifai
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
providers:
inference:
- provider_id: clarifai
provider_type: remote::clarifai
config:
PAT: ${env.CLARIFAI_PAT}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/clarifai/trace_store.db}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
config: {}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/registry.db
models:
- metadata: {}
model_id: meta/Llama-3/Llama-3-8B-Instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3-8B-Instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
model_type: llm
- metadata: {}
model_id: meta/Llama-3/llama-3-70B-Instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3-70B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3-70B-Instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3-70B-Instruct
model_type: llm
- metadata: {}
model_id: meta/Llama-3/llama-3_1-8b-instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
model_type: llm
- metadata: {}
model_id: meta/Llama-3/llama-3_2-3b-instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
model_type: llm
- metadata: {}
model_id: meta/Llama-3/llama-3_3-70b-instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: clarifai
provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -1,10 +0,0 @@
name: local-clarifai
distribution_spec:
description: Use Clarifai for running LLM inference
providers:
inference: remote::clarifai
memory: meta-reference
safety: meta-reference
agents: meta-reference
telemetry: meta-reference
image_type: conda