mirror of
https://github.com/meta-llama/llama-stack.git
synced 2026-01-07 09:39:56 +00:00
Add Clarifai as Inference Provider
This commit is contained in:
parent
2a24eb7f53
commit
e2cc93c017
16 changed files with 1039 additions and 277 deletions
|
|
@ -101,6 +101,38 @@
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||||
],
|
],
|
||||||
|
"clarifai": [
|
||||||
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"clarifai",
|
||||||
|
"datasets",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"mcp",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"openai",
|
||||||
|
"opentelemetry-exporter-otlp-proto-http",
|
||||||
|
"opentelemetry-sdk",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pymongo",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"requests",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn"
|
||||||
|
],
|
||||||
"dell": [
|
"dell": [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
|
|
|
||||||
77
docs/source/distributions/remote_hosted_distro/clarifai.md
Normal file
77
docs/source/distributions/remote_hosted_distro/clarifai.md
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||||
|
# Clarifai Distribution
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 2
|
||||||
|
:hidden:
|
||||||
|
|
||||||
|
self
|
||||||
|
```
|
||||||
|
|
||||||
|
The `llamastack/distribution-clarifai` distribution consists of the following provider configurations.
|
||||||
|
|
||||||
|
| API | Provider(s) |
|
||||||
|
|-----|-------------|
|
||||||
|
| agents | `inline::meta-reference` |
|
||||||
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
|
| eval | `inline::meta-reference` |
|
||||||
|
| inference | `remote::clarifai` |
|
||||||
|
| safety | `inline::llama-guard` |
|
||||||
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
|
| telemetry | `inline::meta-reference` |
|
||||||
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
|
||||||
|
| vector_io | `inline::faiss` |
|
||||||
|
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
The following environment variables can be configured:
|
||||||
|
|
||||||
|
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||||
|
- `CLARIFAI_PAT`: Clarifai PAT (default: ``)
|
||||||
|
|
||||||
|
### Models
|
||||||
|
|
||||||
|
The following models are available by default:
|
||||||
|
|
||||||
|
- `meta/Llama-3/Llama-3-8B-Instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
|
||||||
|
- `meta/Llama-3/llama-3-70B-Instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
|
||||||
|
- `meta/Llama-3/llama-3_1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
|
||||||
|
- `meta/Llama-3/llama-3_2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
|
||||||
|
- `meta/Llama-3/llama-3_3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
|
||||||
|
|
||||||
|
|
||||||
|
### Prerequisite: PAT
|
||||||
|
|
||||||
|
Make sure you have access to a Clarifai PAT. You can get one by visiting [Clarifai](https://www.clarifai.com/).
|
||||||
|
|
||||||
|
|
||||||
|
## Running Llama Stack with Clarifai
|
||||||
|
|
||||||
|
You can do this via Conda (build code) or Docker which has a pre-built image.
|
||||||
|
|
||||||
|
### Via Docker
|
||||||
|
|
||||||
|
This method allows you to get started quickly without having to build the distribution code.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
LLAMA_STACK_PORT=5001
|
||||||
|
docker run \
|
||||||
|
-it \
|
||||||
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
|
llamastack/distribution-clarifai \
|
||||||
|
--port $LLAMA_STACK_PORT \
|
||||||
|
--env CLARIFAI_PAT=$CLARIFAI_PAT
|
||||||
|
```
|
||||||
|
|
||||||
|
### Via Conda
|
||||||
|
|
||||||
|
```bash
|
||||||
|
llama stack build --template clarifai --image-type conda
|
||||||
|
llama stack run ./run.yaml \
|
||||||
|
--port $LLAMA_STACK_PORT \
|
||||||
|
--env CLARIFAI_PAT=$CLARIFAI_PAT
|
||||||
|
```
|
||||||
77
docs/source/distributions/self_hosted_distro/clarifai.md
Normal file
77
docs/source/distributions/self_hosted_distro/clarifai.md
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||||
|
# Clarifai Distribution
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 2
|
||||||
|
:hidden:
|
||||||
|
|
||||||
|
self
|
||||||
|
```
|
||||||
|
|
||||||
|
The `llamastack/distribution-clarifai` distribution consists of the following provider configurations.
|
||||||
|
|
||||||
|
| API | Provider(s) |
|
||||||
|
|-----|-------------|
|
||||||
|
| agents | `inline::meta-reference` |
|
||||||
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
|
| eval | `inline::meta-reference` |
|
||||||
|
| inference | `remote::clarifai` |
|
||||||
|
| safety | `inline::llama-guard` |
|
||||||
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
|
| telemetry | `inline::meta-reference` |
|
||||||
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
|
||||||
|
| vector_io | `inline::faiss` |
|
||||||
|
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
The following environment variables can be configured:
|
||||||
|
|
||||||
|
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||||
|
- `CLARIFAI_PAT`: Clarifai PAT (default: ``)
|
||||||
|
|
||||||
|
### Models
|
||||||
|
|
||||||
|
The following models are available by default:
|
||||||
|
|
||||||
|
- `meta/Llama-3/Llama-3-8B-Instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
|
||||||
|
- `meta/Llama-3/llama-3-70B-Instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
|
||||||
|
- `meta/Llama-3/llama-3_1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
|
||||||
|
- `meta/Llama-3/llama-3_2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
|
||||||
|
- `meta/Llama-3/llama-3_3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
|
||||||
|
|
||||||
|
|
||||||
|
### Prerequisite: PAT
|
||||||
|
|
||||||
|
Make sure you have access to a Clarifai PAT. You can get one by visiting [Clarifai](https://www.clarifai.com/).
|
||||||
|
|
||||||
|
|
||||||
|
## Running Llama Stack with Clarifai
|
||||||
|
|
||||||
|
You can do this via Conda (build code) or Docker which has a pre-built image.
|
||||||
|
|
||||||
|
### Via Docker
|
||||||
|
|
||||||
|
This method allows you to get started quickly without having to build the distribution code.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
LLAMA_STACK_PORT=5001
|
||||||
|
docker run \
|
||||||
|
-it \
|
||||||
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
|
llamastack/distribution-clarifai \
|
||||||
|
--port $LLAMA_STACK_PORT \
|
||||||
|
--env CLARIFAI_PAT=$CLARIFAI_PAT
|
||||||
|
```
|
||||||
|
|
||||||
|
### Via Conda
|
||||||
|
|
||||||
|
```bash
|
||||||
|
llama stack build --template clarifai --image-type conda
|
||||||
|
llama stack run ./run.yaml \
|
||||||
|
--port $LLAMA_STACK_PORT \
|
||||||
|
--env CLARIFAI_PAT=$CLARIFAI_PAT
|
||||||
|
```
|
||||||
|
|
@ -1,260 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
from typing import AsyncGenerator, List, Optional
|
|
||||||
|
|
||||||
from clarifai import client
|
|
||||||
|
|
||||||
from llama_models.llama3.api.chat_format import ChatFormat
|
|
||||||
from llama_models.llama3.api.datatypes import Message, StopReason
|
|
||||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
|
||||||
|
|
||||||
from llama_stack.apis.inference import * # noqa: F403
|
|
||||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
|
||||||
from llama_stack.providers.utils.inference.augment_messages import (
|
|
||||||
augment_messages_for_tools,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.utils.inference.routable import RoutableProviderForModels
|
|
||||||
|
|
||||||
from .config import ClarifaiImplConfig
|
|
||||||
|
|
||||||
|
|
||||||
CLARIFAI_SUPPORTED_MODELS = {
|
|
||||||
"Llama3.1-8B-Instruct": "meta/Llama-3/llama-3_1-8b-instruct",
|
|
||||||
"Llama3.1-70B-Instruct": "meta/Llama-3/llama-3-70B-Instruct",
|
|
||||||
"Llama3.2-3B-Instruct": "meta/Llama-3/llama-3_2-3b-instruct",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class ClarifaiInferenceAdapter(
|
|
||||||
Inference, NeedsRequestProviderData, RoutableProviderForModels
|
|
||||||
):
|
|
||||||
def __init__(self, config: ClarifaiImplConfig) -> None:
|
|
||||||
RoutableProviderForModels.__init__(
|
|
||||||
self, stack_to_provider_models_map=CLARIFAI_SUPPORTED_MODELS
|
|
||||||
)
|
|
||||||
self.config = config
|
|
||||||
tokenizer = Tokenizer.get_instance()
|
|
||||||
self.formatter = ChatFormat(tokenizer)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def client(self) -> client:
|
|
||||||
return client
|
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
|
||||||
return
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def completion(
|
|
||||||
self,
|
|
||||||
model: str,
|
|
||||||
content: InterleavedTextMedia,
|
|
||||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
|
||||||
stream: Optional[bool] = False,
|
|
||||||
logprobs: Optional[LogProbConfig] = None,
|
|
||||||
) -> AsyncGenerator:
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
def _messages_to_clarifai_messages(self, messages: list[Message]) -> bytes:
|
|
||||||
clarifai_messages = ""
|
|
||||||
for message in messages:
|
|
||||||
if message.role == "ipython":
|
|
||||||
role = "tool"
|
|
||||||
else:
|
|
||||||
role = message.role
|
|
||||||
clarifai_messages += (
|
|
||||||
f"{{'role': '{role}', 'content': '{message.content}'}}\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
return clarifai_messages.encode()
|
|
||||||
|
|
||||||
def get_clarifai_chat_options(self, request: ChatCompletionRequest) -> dict:
|
|
||||||
options = {}
|
|
||||||
if request.sampling_params is not None:
|
|
||||||
for attr in {"temperature", "top_p", "top_k", "max_tokens"}:
|
|
||||||
if getattr(request.sampling_params, attr):
|
|
||||||
options[attr] = getattr(request.sampling_params, attr)
|
|
||||||
|
|
||||||
return options
|
|
||||||
|
|
||||||
def resolve_clarifai_model(self, model_name: str) -> str:
|
|
||||||
model = self.map_to_provider_model(model_name)
|
|
||||||
assert (
|
|
||||||
model is not None and model in CLARIFAI_SUPPORTED_MODELS.values()
|
|
||||||
), f"Unsupported model: {model_name}, use one of the supported models: {','.join(CLARIFAI_SUPPORTED_MODELS.keys())}"
|
|
||||||
user_id, app_id, model_id = model.split("/")
|
|
||||||
return f"https://clarifai.com/{user_id}/{app_id}/models/{model_id}"
|
|
||||||
|
|
||||||
async def chat_completion(
|
|
||||||
self,
|
|
||||||
model: str,
|
|
||||||
messages: List[Message],
|
|
||||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
|
||||||
tools: Optional[List[ToolDefinition]] = None,
|
|
||||||
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
|
||||||
tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
|
|
||||||
stream: Optional[bool] = False,
|
|
||||||
logprobs: Optional[LogProbConfig] = None,
|
|
||||||
) -> AsyncGenerator:
|
|
||||||
request = ChatCompletionRequest(
|
|
||||||
model=model,
|
|
||||||
messages=messages,
|
|
||||||
sampling_params=sampling_params,
|
|
||||||
tools=tools or [],
|
|
||||||
tool_choice=tool_choice,
|
|
||||||
tool_prompt_format=tool_prompt_format,
|
|
||||||
stream=stream,
|
|
||||||
logprobs=logprobs,
|
|
||||||
)
|
|
||||||
|
|
||||||
# accumulate sampling params and other options to pass to clarifai
|
|
||||||
options = self.get_clarifai_chat_options(request)
|
|
||||||
clarifai_model = self.resolve_clarifai_model(request.model)
|
|
||||||
messages = augment_messages_for_tools(request)
|
|
||||||
|
|
||||||
if not request.stream:
|
|
||||||
try:
|
|
||||||
r = client.app.Model(
|
|
||||||
url=clarifai_model, pat=self.config.PAT
|
|
||||||
).predict_by_bytes(
|
|
||||||
self._messages_to_clarifai_messages(messages),
|
|
||||||
input_type="text",
|
|
||||||
inference_params=options,
|
|
||||||
)
|
|
||||||
except AssertionError as e:
|
|
||||||
if "CLARIFAI_PAT" in str(e):
|
|
||||||
raise ValueError("Please provide a valid PAT for Clarifai")
|
|
||||||
else:
|
|
||||||
raise e
|
|
||||||
# TODO : Add stop reason to the response, currently not supported by clarifai.
|
|
||||||
stop_reason = StopReason.end_of_turn
|
|
||||||
completion_message = self.formatter.decode_assistant_message_from_content(
|
|
||||||
r.outputs[0].data.text.raw, stop_reason
|
|
||||||
)
|
|
||||||
yield ChatCompletionResponse(
|
|
||||||
completion_message=completion_message,
|
|
||||||
logprobs=None,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
yield ChatCompletionResponseStreamChunk(
|
|
||||||
event=ChatCompletionResponseEvent(
|
|
||||||
event_type=ChatCompletionResponseEventType.start,
|
|
||||||
delta="",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
buffer = ""
|
|
||||||
ipython = False
|
|
||||||
stop_reason = StopReason.end_of_turn
|
|
||||||
# TODO: Add support for stream, currently not supported by clarifai. But mocked for now.
|
|
||||||
try:
|
|
||||||
chunks = [
|
|
||||||
client.app.Model(url=clarifai_model, pat=self.config.PAT)
|
|
||||||
.predict_by_bytes(
|
|
||||||
self._messages_to_clarifai_messages(messages),
|
|
||||||
input_type="text",
|
|
||||||
inference_params=options,
|
|
||||||
)
|
|
||||||
.outputs[0]
|
|
||||||
.data.text.raw
|
|
||||||
]
|
|
||||||
except AssertionError as e:
|
|
||||||
if "CLARIFAI_PAT" in str(e):
|
|
||||||
raise ValueError("Please provide a valid PAT for Clarifai")
|
|
||||||
else:
|
|
||||||
raise e
|
|
||||||
for chunk in chunks:
|
|
||||||
text = chunk
|
|
||||||
|
|
||||||
if text is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# check if its a tool call ( aka starts with <|python_tag|> )
|
|
||||||
if not ipython and text.startswith("<|python_tag|>"):
|
|
||||||
ipython = True
|
|
||||||
yield ChatCompletionResponseStreamChunk(
|
|
||||||
event=ChatCompletionResponseEvent(
|
|
||||||
event_type=ChatCompletionResponseEventType.progress,
|
|
||||||
delta=ToolCallDelta(
|
|
||||||
content="",
|
|
||||||
parse_status=ToolCallParseStatus.started,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
buffer += text
|
|
||||||
continue
|
|
||||||
|
|
||||||
if ipython:
|
|
||||||
if text == "<|eot_id|>":
|
|
||||||
stop_reason = StopReason.end_of_turn
|
|
||||||
text = ""
|
|
||||||
continue
|
|
||||||
elif text == "<|eom_id|>":
|
|
||||||
stop_reason = StopReason.end_of_message
|
|
||||||
text = ""
|
|
||||||
continue
|
|
||||||
|
|
||||||
buffer += text
|
|
||||||
delta = ToolCallDelta(
|
|
||||||
content=text,
|
|
||||||
parse_status=ToolCallParseStatus.in_progress,
|
|
||||||
)
|
|
||||||
|
|
||||||
yield ChatCompletionResponseStreamChunk(
|
|
||||||
event=ChatCompletionResponseEvent(
|
|
||||||
event_type=ChatCompletionResponseEventType.progress,
|
|
||||||
delta=delta,
|
|
||||||
stop_reason=stop_reason,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
buffer += text
|
|
||||||
yield ChatCompletionResponseStreamChunk(
|
|
||||||
event=ChatCompletionResponseEvent(
|
|
||||||
event_type=ChatCompletionResponseEventType.progress,
|
|
||||||
delta=text,
|
|
||||||
stop_reason=stop_reason,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# parse tool calls and report errors
|
|
||||||
message = self.formatter.decode_assistant_message_from_content(
|
|
||||||
buffer, stop_reason
|
|
||||||
)
|
|
||||||
parsed_tool_calls = len(message.tool_calls) > 0
|
|
||||||
if ipython and not parsed_tool_calls:
|
|
||||||
yield ChatCompletionResponseStreamChunk(
|
|
||||||
event=ChatCompletionResponseEvent(
|
|
||||||
event_type=ChatCompletionResponseEventType.progress,
|
|
||||||
delta=ToolCallDelta(
|
|
||||||
content="",
|
|
||||||
parse_status=ToolCallParseStatus.failure,
|
|
||||||
),
|
|
||||||
stop_reason=stop_reason,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
for tool_call in message.tool_calls:
|
|
||||||
yield ChatCompletionResponseStreamChunk(
|
|
||||||
event=ChatCompletionResponseEvent(
|
|
||||||
event_type=ChatCompletionResponseEventType.progress,
|
|
||||||
delta=ToolCallDelta(
|
|
||||||
content=tool_call,
|
|
||||||
parse_status=ToolCallParseStatus.success,
|
|
||||||
),
|
|
||||||
stop_reason=stop_reason,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
yield ChatCompletionResponseStreamChunk(
|
|
||||||
event=ChatCompletionResponseEvent(
|
|
||||||
event_type=ChatCompletionResponseEventType.complete,
|
|
||||||
delta="",
|
|
||||||
stop_reason=stop_reason,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
@ -184,8 +184,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"clarifai",
|
"clarifai",
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.adapters.inference.clarifai",
|
module="llama_stack.providers.remote.inference.clarifai",
|
||||||
config_class="llama_stack.providers.adapters.inference.clarifai.ClarifaiImplConfig",
|
config_class="llama_stack.providers.remote.inference.clarifai.ClarifaiImplConfig",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
|
|
||||||
|
|
@ -9,9 +9,7 @@ from .config import ClarifaiImplConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: ClarifaiImplConfig, _deps):
|
async def get_adapter_impl(config: ClarifaiImplConfig, _deps):
|
||||||
assert isinstance(
|
assert isinstance(config, ClarifaiImplConfig), f"Unexpected config type: {type(config)}"
|
||||||
config, ClarifaiImplConfig
|
|
||||||
), f"Unexpected config type: {type(config)}"
|
|
||||||
impl = ClarifaiInferenceAdapter(config)
|
impl = ClarifaiInferenceAdapter(config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
204
llama_stack/providers/remote/inference/clarifai/clarifai.py
Normal file
204
llama_stack/providers/remote/inference/clarifai/clarifai.py
Normal file
|
|
@ -0,0 +1,204 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from typing import AsyncGenerator, List, Optional, Union
|
||||||
|
|
||||||
|
from clarifai import client
|
||||||
|
|
||||||
|
from llama_stack import logcat
|
||||||
|
from llama_stack.apis.common.content_types import (
|
||||||
|
InterleavedContent,
|
||||||
|
InterleavedContentItem,
|
||||||
|
)
|
||||||
|
from llama_stack.apis.inference import (
|
||||||
|
ChatCompletionRequest,
|
||||||
|
ChatCompletionResponse,
|
||||||
|
CompletionRequest,
|
||||||
|
EmbeddingsResponse,
|
||||||
|
EmbeddingTaskType,
|
||||||
|
Inference,
|
||||||
|
LogProbConfig,
|
||||||
|
Message,
|
||||||
|
ResponseFormat,
|
||||||
|
ResponseFormatType,
|
||||||
|
SamplingParams,
|
||||||
|
TextTruncation,
|
||||||
|
ToolChoice,
|
||||||
|
ToolConfig,
|
||||||
|
ToolDefinition,
|
||||||
|
ToolPromptFormat,
|
||||||
|
)
|
||||||
|
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||||
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
|
ModelRegistryHelper,
|
||||||
|
)
|
||||||
|
from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
|
get_sampling_options,
|
||||||
|
process_chat_completion_response,
|
||||||
|
process_chat_completion_stream_response,
|
||||||
|
)
|
||||||
|
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
|
chat_completion_request_to_prompt,
|
||||||
|
request_has_media,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .config import ClarifaiImplConfig
|
||||||
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
|
class ClarifaiInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
|
||||||
|
def __init__(self, config: ClarifaiImplConfig) -> None:
|
||||||
|
ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
async def initialize(self) -> None:
|
||||||
|
return
|
||||||
|
|
||||||
|
async def shutdown(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _get_client(self) -> client:
|
||||||
|
return client
|
||||||
|
|
||||||
|
async def completion(
|
||||||
|
self,
|
||||||
|
model_id: str,
|
||||||
|
content: InterleavedContent,
|
||||||
|
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||||
|
response_format: Optional[ResponseFormat] = None,
|
||||||
|
stream: Optional[bool] = False,
|
||||||
|
logprobs: Optional[LogProbConfig] = None,
|
||||||
|
) -> AsyncGenerator:
|
||||||
|
return NotImplementedError
|
||||||
|
|
||||||
|
def resolve_clarifai_model(self, model_name: str) -> str:
|
||||||
|
# model = self.get_llama_model(model_name)
|
||||||
|
# assert (
|
||||||
|
# model is not None and model in CLARIFAI_SUPPORTED_MODELS.values()
|
||||||
|
# ), f"Unsupported model: {model_name}, use one of the supported models: {','.join(CLARIFAI_SUPPORTED_MODELS.keys())}"
|
||||||
|
user_id, app_id, model_id = model_name.split("/")
|
||||||
|
return f"https://clarifai.com/{user_id}/{app_id}/models/{model_id}"
|
||||||
|
|
||||||
|
# async def _nonstream_completion(self, request: CompletionRequest) -> ChatCompletionResponse:
|
||||||
|
# params = await self._get_params(request)
|
||||||
|
# model_url = self.resolve_clarifai_model(request.model)
|
||||||
|
# r = self._get_client().app.Model(url=model_url, pat=self.config.PAT).predict_by_bytes(**params)
|
||||||
|
# return process_completion_response(r)
|
||||||
|
|
||||||
|
# async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
|
||||||
|
# params = await self._get_params(request)
|
||||||
|
# model_url = self.resolve_clarifai_model(request.model)
|
||||||
|
|
||||||
|
# async def _to_async_generator():
|
||||||
|
# s = self._get_client().app.Model(url=model_url, pat=self.config.PAT).stream_by_bytes(**params)
|
||||||
|
# for chunk in s:
|
||||||
|
# yield chunk
|
||||||
|
|
||||||
|
# stream = _to_async_generator()
|
||||||
|
# async for chunk in process_completion_stream_response(stream):
|
||||||
|
# yield chunk
|
||||||
|
|
||||||
|
def _build_options(
|
||||||
|
self,
|
||||||
|
sampling_params: Optional[SamplingParams],
|
||||||
|
logprobs: Optional[LogProbConfig],
|
||||||
|
fmt: ResponseFormat,
|
||||||
|
) -> dict:
|
||||||
|
options = get_sampling_options(sampling_params)
|
||||||
|
if fmt:
|
||||||
|
if fmt.type == ResponseFormatType.json_schema.value:
|
||||||
|
options["response_format"] = {
|
||||||
|
"type": "json_object",
|
||||||
|
"schema": fmt.json_schema,
|
||||||
|
}
|
||||||
|
elif fmt.type == ResponseFormatType.grammar.value:
|
||||||
|
raise NotImplementedError("Grammar response format not supported yet")
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown response format {fmt.type}")
|
||||||
|
|
||||||
|
if logprobs and logprobs.top_k:
|
||||||
|
if logprobs.top_k != 1:
|
||||||
|
raise ValueError(
|
||||||
|
f"Unsupported value: Clarifai only supports logprobs top_k=1. {logprobs.top_k} was provided",
|
||||||
|
)
|
||||||
|
options["logprobs"] = 1
|
||||||
|
|
||||||
|
return options
|
||||||
|
|
||||||
|
async def chat_completion(
|
||||||
|
self,
|
||||||
|
model_id: str,
|
||||||
|
messages: List[Message],
|
||||||
|
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||||
|
tools: Optional[List[ToolDefinition]] = None,
|
||||||
|
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
||||||
|
tool_prompt_format: Optional[ToolPromptFormat] = None,
|
||||||
|
response_format: Optional[ResponseFormat] = None,
|
||||||
|
stream: Optional[bool] = False,
|
||||||
|
logprobs: Optional[LogProbConfig] = None,
|
||||||
|
tool_config: Optional[ToolConfig] = None,
|
||||||
|
) -> AsyncGenerator:
|
||||||
|
model = await self.model_store.get_model(model_id)
|
||||||
|
request = ChatCompletionRequest(
|
||||||
|
model=model.provider_resource_id,
|
||||||
|
messages=messages,
|
||||||
|
sampling_params=sampling_params,
|
||||||
|
tools=tools or [],
|
||||||
|
response_format=response_format,
|
||||||
|
stream=stream,
|
||||||
|
logprobs=logprobs,
|
||||||
|
tool_config=tool_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
if stream:
|
||||||
|
return self._stream_chat_completion(request)
|
||||||
|
else:
|
||||||
|
return await self._nonstream_chat_completion(request)
|
||||||
|
|
||||||
|
async def _nonstream_chat_completion(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
|
||||||
|
params = await self._get_params(request)
|
||||||
|
model_url = self.resolve_clarifai_model(request.model)
|
||||||
|
r = self._get_client().app.Model(url=model_url, pat=self.config.PAT).predict_by_bytes(**params)
|
||||||
|
return process_chat_completion_response(r)
|
||||||
|
|
||||||
|
async def _stream_chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator:
|
||||||
|
params = await self._get_params(request)
|
||||||
|
model_url = self.resolve_clarifai_model(request.model)
|
||||||
|
|
||||||
|
async def _to_async_generator():
|
||||||
|
s = self._get_client().app.Model(url=model_url, pat=self.config.PAT).predict_by_bytes(**params)
|
||||||
|
for chunk in s:
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
stream = _to_async_generator()
|
||||||
|
async for chunk in process_chat_completion_stream_response(stream):
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
async def _get_params(self, request: Union[ChatCompletionRequest, CompletionRequest]) -> dict:
|
||||||
|
input_dict = {}
|
||||||
|
media_present = request_has_media(request)
|
||||||
|
llama_model = self.get_llama_model(request.model)
|
||||||
|
if isinstance(request, ChatCompletionRequest):
|
||||||
|
assert not media_present, "Clarifai does not support media for ChatCompletion requests"
|
||||||
|
input_dict["input_bytes"] = (await chat_completion_request_to_prompt(request, llama_model)).encode()
|
||||||
|
|
||||||
|
params = {
|
||||||
|
**input_dict,
|
||||||
|
"input_type": "text",
|
||||||
|
"inference_params": self._build_options(request.sampling_params, request.logprobs, request.response_format),
|
||||||
|
}
|
||||||
|
logcat.debug("inference", f"params to clarifai: {params}")
|
||||||
|
return params
|
||||||
|
|
||||||
|
async def embeddings(
|
||||||
|
self,
|
||||||
|
model_id: str,
|
||||||
|
contents: List[str] | List[InterleavedContentItem],
|
||||||
|
text_truncation: Optional[TextTruncation] = TextTruncation.none,
|
||||||
|
output_dimension: Optional[int] = None,
|
||||||
|
task_type: Optional[EmbeddingTaskType] = None,
|
||||||
|
) -> EmbeddingsResponse:
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
@ -4,11 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Any, Dict
|
||||||
|
|
||||||
from llama_models.schema_utils import json_schema_type
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from llama_stack.schema_utils import json_schema_type
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ClarifaiImplConfig(BaseModel):
|
class ClarifaiImplConfig(BaseModel):
|
||||||
|
|
@ -16,3 +17,9 @@ class ClarifaiImplConfig(BaseModel):
|
||||||
default=None,
|
default=None,
|
||||||
description="The Clarifai Personal Access Token (PAT) to use for authentication.",
|
description="The Clarifai Personal Access Token (PAT) to use for authentication.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"PAT": "${env.CLARIFAI_PAT}",
|
||||||
|
}
|
||||||
33
llama_stack/providers/remote/inference/clarifai/models.py
Normal file
33
llama_stack/providers/remote/inference/clarifai/models.py
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
|
build_hf_repo_model_entry,
|
||||||
|
)
|
||||||
|
|
||||||
|
MODEL_ENTRIES = [
|
||||||
|
build_hf_repo_model_entry(
|
||||||
|
"meta/Llama-3/Llama-3-8B-Instruct",
|
||||||
|
CoreModelId.llama3_8b_instruct.value,
|
||||||
|
),
|
||||||
|
build_hf_repo_model_entry(
|
||||||
|
"meta/Llama-3/llama-3-70B-Instruct",
|
||||||
|
CoreModelId.llama3_70b_instruct.value,
|
||||||
|
),
|
||||||
|
build_hf_repo_model_entry(
|
||||||
|
"meta/Llama-3/llama-3_1-8b-instruct",
|
||||||
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
|
),
|
||||||
|
build_hf_repo_model_entry(
|
||||||
|
"meta/Llama-3/llama-3_2-3b-instruct",
|
||||||
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
|
),
|
||||||
|
build_hf_repo_model_entry(
|
||||||
|
"meta/Llama-3/llama-3_3-70b-instruct",
|
||||||
|
CoreModelId.llama3_3_70b_instruct.value,
|
||||||
|
),
|
||||||
|
]
|
||||||
7
llama_stack/templates/clarifai/__init__.py
Normal file
7
llama_stack/templates/clarifai/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from .clarifai import get_distribution_template # noqa: F401
|
||||||
30
llama_stack/templates/clarifai/build.yaml
Normal file
30
llama_stack/templates/clarifai/build.yaml
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
version: '2'
|
||||||
|
distribution_spec:
|
||||||
|
description: Use Clarifai for running LLM inference
|
||||||
|
providers:
|
||||||
|
inference:
|
||||||
|
- remote::clarifai
|
||||||
|
vector_io:
|
||||||
|
- inline::faiss
|
||||||
|
safety:
|
||||||
|
- inline::llama-guard
|
||||||
|
agents:
|
||||||
|
- inline::meta-reference
|
||||||
|
telemetry:
|
||||||
|
- inline::meta-reference
|
||||||
|
eval:
|
||||||
|
- inline::meta-reference
|
||||||
|
datasetio:
|
||||||
|
- remote::huggingface
|
||||||
|
- inline::localfs
|
||||||
|
scoring:
|
||||||
|
- inline::basic
|
||||||
|
- inline::llm-as-judge
|
||||||
|
- inline::braintrust
|
||||||
|
tool_runtime:
|
||||||
|
- remote::brave-search
|
||||||
|
- remote::tavily-search
|
||||||
|
- inline::code-interpreter
|
||||||
|
- inline::rag-runtime
|
||||||
|
- remote::model-context-protocol
|
||||||
|
image_type: conda
|
||||||
160
llama_stack/templates/clarifai/clarifai.py
Normal file
160
llama_stack/templates/clarifai/clarifai.py
Normal file
|
|
@ -0,0 +1,160 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from llama_stack.apis.models.models import ModelType
|
||||||
|
from llama_stack.distribution.datatypes import (
|
||||||
|
ModelInput,
|
||||||
|
Provider,
|
||||||
|
ShieldInput,
|
||||||
|
ToolGroupInput,
|
||||||
|
)
|
||||||
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
|
SentenceTransformersInferenceConfig,
|
||||||
|
)
|
||||||
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
|
from llama_stack.providers.remote.inference.clarifai import ClarifaiImplConfig
|
||||||
|
from llama_stack.providers.remote.inference.clarifai.models import MODEL_ENTRIES
|
||||||
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||||
|
|
||||||
|
|
||||||
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
|
providers = {
|
||||||
|
"inference": ["remote::clarifai"],
|
||||||
|
"vector_io": ["inline::faiss"],
|
||||||
|
"safety": ["inline::llama-guard"],
|
||||||
|
"agents": ["inline::meta-reference"],
|
||||||
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
"eval": ["inline::meta-reference"],
|
||||||
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
|
"tool_runtime": [
|
||||||
|
"remote::brave-search",
|
||||||
|
"remote::tavily-search",
|
||||||
|
"inline::code-interpreter",
|
||||||
|
"inline::rag-runtime",
|
||||||
|
"remote::model-context-protocol",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
name = "clarifai"
|
||||||
|
inference_provider = Provider(
|
||||||
|
provider_id="clarifai",
|
||||||
|
provider_type="remote::clarifai",
|
||||||
|
config=ClarifaiImplConfig.sample_run_config(),
|
||||||
|
)
|
||||||
|
vector_io_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||||
|
)
|
||||||
|
embedding_provider = Provider(
|
||||||
|
provider_id="sentence-transformers",
|
||||||
|
provider_type="inline::sentence-transformers",
|
||||||
|
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||||
|
)
|
||||||
|
available_models = {
|
||||||
|
"clarifai": MODEL_ENTRIES,
|
||||||
|
}
|
||||||
|
default_models = get_model_registry(available_models)
|
||||||
|
default_tool_groups = [
|
||||||
|
ToolGroupInput(
|
||||||
|
toolgroup_id="builtin::websearch",
|
||||||
|
provider_id="tavily-search",
|
||||||
|
),
|
||||||
|
ToolGroupInput(
|
||||||
|
toolgroup_id="builtin::rag",
|
||||||
|
provider_id="rag-runtime",
|
||||||
|
),
|
||||||
|
ToolGroupInput(
|
||||||
|
toolgroup_id="builtin::code_interpreter",
|
||||||
|
provider_id="code-interpreter",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
embedding_model = ModelInput(
|
||||||
|
model_id="all-MiniLM-L6-v2",
|
||||||
|
provider_id="sentence-transformers",
|
||||||
|
model_type=ModelType.embedding,
|
||||||
|
metadata={
|
||||||
|
"embedding_dimension": 384,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
return DistributionTemplate(
|
||||||
|
name=name,
|
||||||
|
distro_type="remote_hosted",
|
||||||
|
description="Use Clarifai for running LLM inference",
|
||||||
|
container_image=None,
|
||||||
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
|
providers=providers,
|
||||||
|
available_models_by_provider=available_models,
|
||||||
|
run_configs={
|
||||||
|
"run.yaml": RunConfigSettings(
|
||||||
|
provider_overrides={
|
||||||
|
"inference": [inference_provider, embedding_provider],
|
||||||
|
"vector_io": [vector_io_provider],
|
||||||
|
},
|
||||||
|
default_models=default_models + [embedding_model],
|
||||||
|
default_tool_groups=default_tool_groups,
|
||||||
|
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||||
|
),
|
||||||
|
"run-with-safety.yaml": RunConfigSettings(
|
||||||
|
provider_overrides={
|
||||||
|
"inference": [
|
||||||
|
inference_provider,
|
||||||
|
embedding_provider,
|
||||||
|
],
|
||||||
|
"vector_io": [vector_io_provider],
|
||||||
|
"safety": [
|
||||||
|
Provider(
|
||||||
|
provider_id="llama-guard",
|
||||||
|
provider_type="inline::llama-guard",
|
||||||
|
config={},
|
||||||
|
),
|
||||||
|
Provider(
|
||||||
|
provider_id="llama-guard-vision",
|
||||||
|
provider_type="inline::llama-guard",
|
||||||
|
config={},
|
||||||
|
),
|
||||||
|
Provider(
|
||||||
|
provider_id="code-scanner",
|
||||||
|
provider_type="inline::code-scanner",
|
||||||
|
config={},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
default_models=[
|
||||||
|
*default_models,
|
||||||
|
embedding_model,
|
||||||
|
],
|
||||||
|
default_shields=[
|
||||||
|
ShieldInput(
|
||||||
|
shield_id="meta-llama/Llama-Guard-3-8B",
|
||||||
|
provider_id="llama-guard",
|
||||||
|
),
|
||||||
|
ShieldInput(
|
||||||
|
shield_id="meta-llama/Llama-Guard-3-11B-Vision",
|
||||||
|
provider_id="llama-guard-vision",
|
||||||
|
),
|
||||||
|
ShieldInput(
|
||||||
|
shield_id="CodeScanner",
|
||||||
|
provider_id="code-scanner",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
default_tool_groups=default_tool_groups,
|
||||||
|
),
|
||||||
|
},
|
||||||
|
run_config_env_vars={
|
||||||
|
"LLAMA_STACK_PORT": (
|
||||||
|
"5001",
|
||||||
|
"Port for the Llama Stack distribution server",
|
||||||
|
),
|
||||||
|
"CLARIFAI_PAT": (
|
||||||
|
"",
|
||||||
|
"Clarifai PAT",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
68
llama_stack/templates/clarifai/doc_template.md
Normal file
68
llama_stack/templates/clarifai/doc_template.md
Normal file
|
|
@ -0,0 +1,68 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
# Clarifai Distribution
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 2
|
||||||
|
:hidden:
|
||||||
|
|
||||||
|
self
|
||||||
|
```
|
||||||
|
|
||||||
|
The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
|
||||||
|
|
||||||
|
{{ providers_table }}
|
||||||
|
|
||||||
|
{% if run_config_env_vars %}
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
The following environment variables can be configured:
|
||||||
|
|
||||||
|
{% for var, (default_value, description) in run_config_env_vars.items() %}
|
||||||
|
- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if default_models %}
|
||||||
|
### Models
|
||||||
|
|
||||||
|
The following models are available by default:
|
||||||
|
|
||||||
|
{% for model in default_models %}
|
||||||
|
- `{{ model.model_id }} {{ model.doc_string }}`
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
|
||||||
|
### Prerequisite: PAT
|
||||||
|
|
||||||
|
Make sure you have access to a Clarifai PAT. You can get one by visiting [Clarifai](https://www.clarifai.com/).
|
||||||
|
|
||||||
|
|
||||||
|
## Running Llama Stack with Clarifai
|
||||||
|
|
||||||
|
You can do this via Conda (build code) or Docker which has a pre-built image.
|
||||||
|
|
||||||
|
### Via Docker
|
||||||
|
|
||||||
|
This method allows you to get started quickly without having to build the distribution code.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
LLAMA_STACK_PORT=5001
|
||||||
|
docker run \
|
||||||
|
-it \
|
||||||
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
|
llamastack/distribution-{{ name }} \
|
||||||
|
--port $LLAMA_STACK_PORT \
|
||||||
|
--env CLARIFAI_PAT=$CLARIFAI_PAT
|
||||||
|
```
|
||||||
|
|
||||||
|
### Via Conda
|
||||||
|
|
||||||
|
```bash
|
||||||
|
llama stack build --template clarifai --image-type conda
|
||||||
|
llama stack run ./run.yaml \
|
||||||
|
--port $LLAMA_STACK_PORT \
|
||||||
|
--env CLARIFAI_PAT=$CLARIFAI_PAT
|
||||||
|
```
|
||||||
175
llama_stack/templates/clarifai/run-with-safety.yaml
Normal file
175
llama_stack/templates/clarifai/run-with-safety.yaml
Normal file
|
|
@ -0,0 +1,175 @@
|
||||||
|
version: '2'
|
||||||
|
image_name: clarifai
|
||||||
|
apis:
|
||||||
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
|
- inference
|
||||||
|
- safety
|
||||||
|
- scoring
|
||||||
|
- telemetry
|
||||||
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
|
providers:
|
||||||
|
inference:
|
||||||
|
- provider_id: clarifai
|
||||||
|
provider_type: remote::clarifai
|
||||||
|
config:
|
||||||
|
PAT: ${env.CLARIFAI_PAT}
|
||||||
|
- provider_id: sentence-transformers
|
||||||
|
provider_type: inline::sentence-transformers
|
||||||
|
config: {}
|
||||||
|
vector_io:
|
||||||
|
- provider_id: faiss
|
||||||
|
provider_type: inline::faiss
|
||||||
|
config:
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/faiss_store.db
|
||||||
|
safety:
|
||||||
|
- provider_id: llama-guard
|
||||||
|
provider_type: inline::llama-guard
|
||||||
|
config: {}
|
||||||
|
- provider_id: llama-guard-vision
|
||||||
|
provider_type: inline::llama-guard
|
||||||
|
config: {}
|
||||||
|
- provider_id: code-scanner
|
||||||
|
provider_type: inline::code-scanner
|
||||||
|
config: {}
|
||||||
|
agents:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config:
|
||||||
|
persistence_store:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/agents_store.db
|
||||||
|
telemetry:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config:
|
||||||
|
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
||||||
|
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||||
|
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/clarifai/trace_store.db}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config:
|
||||||
|
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||||
|
tool_runtime:
|
||||||
|
- provider_id: brave-search
|
||||||
|
provider_type: remote::brave-search
|
||||||
|
config:
|
||||||
|
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||||
|
max_results: 3
|
||||||
|
- provider_id: tavily-search
|
||||||
|
provider_type: remote::tavily-search
|
||||||
|
config:
|
||||||
|
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||||
|
max_results: 3
|
||||||
|
- provider_id: code-interpreter
|
||||||
|
provider_type: inline::code-interpreter
|
||||||
|
config: {}
|
||||||
|
- provider_id: rag-runtime
|
||||||
|
provider_type: inline::rag-runtime
|
||||||
|
config: {}
|
||||||
|
- provider_id: model-context-protocol
|
||||||
|
provider_type: remote::model-context-protocol
|
||||||
|
config: {}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/registry.db
|
||||||
|
models:
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/Llama-3/Llama-3-8B-Instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3-8B-Instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/Llama-3/llama-3-70B-Instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3-70B-Instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3-70B-Instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3-70B-Instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/Llama-3/llama-3_1-8b-instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/Llama-3/llama-3_2-3b-instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/Llama-3/llama-3_3-70b-instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata:
|
||||||
|
embedding_dimension: 384
|
||||||
|
model_id: all-MiniLM-L6-v2
|
||||||
|
provider_id: sentence-transformers
|
||||||
|
model_type: embedding
|
||||||
|
shields:
|
||||||
|
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||||
|
provider_id: llama-guard
|
||||||
|
- shield_id: meta-llama/Llama-Guard-3-11B-Vision
|
||||||
|
provider_id: llama-guard-vision
|
||||||
|
- shield_id: CodeScanner
|
||||||
|
provider_id: code-scanner
|
||||||
|
vector_dbs: []
|
||||||
|
datasets: []
|
||||||
|
scoring_fns: []
|
||||||
|
benchmarks: []
|
||||||
|
tool_groups:
|
||||||
|
- toolgroup_id: builtin::websearch
|
||||||
|
provider_id: tavily-search
|
||||||
|
- toolgroup_id: builtin::rag
|
||||||
|
provider_id: rag-runtime
|
||||||
|
- toolgroup_id: builtin::code_interpreter
|
||||||
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
164
llama_stack/templates/clarifai/run.yaml
Normal file
164
llama_stack/templates/clarifai/run.yaml
Normal file
|
|
@ -0,0 +1,164 @@
|
||||||
|
version: '2'
|
||||||
|
image_name: clarifai
|
||||||
|
apis:
|
||||||
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
|
- inference
|
||||||
|
- safety
|
||||||
|
- scoring
|
||||||
|
- telemetry
|
||||||
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
|
providers:
|
||||||
|
inference:
|
||||||
|
- provider_id: clarifai
|
||||||
|
provider_type: remote::clarifai
|
||||||
|
config:
|
||||||
|
PAT: ${env.CLARIFAI_PAT}
|
||||||
|
- provider_id: sentence-transformers
|
||||||
|
provider_type: inline::sentence-transformers
|
||||||
|
config: {}
|
||||||
|
vector_io:
|
||||||
|
- provider_id: faiss
|
||||||
|
provider_type: inline::faiss
|
||||||
|
config:
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/faiss_store.db
|
||||||
|
safety:
|
||||||
|
- provider_id: llama-guard
|
||||||
|
provider_type: inline::llama-guard
|
||||||
|
config: {}
|
||||||
|
agents:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config:
|
||||||
|
persistence_store:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/agents_store.db
|
||||||
|
telemetry:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config:
|
||||||
|
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
||||||
|
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||||
|
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/clarifai/trace_store.db}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config:
|
||||||
|
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||||
|
tool_runtime:
|
||||||
|
- provider_id: brave-search
|
||||||
|
provider_type: remote::brave-search
|
||||||
|
config:
|
||||||
|
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||||
|
max_results: 3
|
||||||
|
- provider_id: tavily-search
|
||||||
|
provider_type: remote::tavily-search
|
||||||
|
config:
|
||||||
|
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||||
|
max_results: 3
|
||||||
|
- provider_id: code-interpreter
|
||||||
|
provider_type: inline::code-interpreter
|
||||||
|
config: {}
|
||||||
|
- provider_id: rag-runtime
|
||||||
|
provider_type: inline::rag-runtime
|
||||||
|
config: {}
|
||||||
|
- provider_id: model-context-protocol
|
||||||
|
provider_type: remote::model-context-protocol
|
||||||
|
config: {}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/registry.db
|
||||||
|
models:
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/Llama-3/Llama-3-8B-Instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3-8B-Instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/Llama-3/llama-3-70B-Instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3-70B-Instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3-70B-Instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3-70B-Instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/Llama-3/llama-3_1-8b-instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/Llama-3/llama-3_2-3b-instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/Llama-3/llama-3_3-70b-instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||||
|
provider_id: clarifai
|
||||||
|
provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata:
|
||||||
|
embedding_dimension: 384
|
||||||
|
model_id: all-MiniLM-L6-v2
|
||||||
|
provider_id: sentence-transformers
|
||||||
|
model_type: embedding
|
||||||
|
shields:
|
||||||
|
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||||
|
vector_dbs: []
|
||||||
|
datasets: []
|
||||||
|
scoring_fns: []
|
||||||
|
benchmarks: []
|
||||||
|
tool_groups:
|
||||||
|
- toolgroup_id: builtin::websearch
|
||||||
|
provider_id: tavily-search
|
||||||
|
- toolgroup_id: builtin::rag
|
||||||
|
provider_id: rag-runtime
|
||||||
|
- toolgroup_id: builtin::code_interpreter
|
||||||
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
@ -1,10 +0,0 @@
|
||||||
name: local-clarifai
|
|
||||||
distribution_spec:
|
|
||||||
description: Use Clarifai for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference: remote::clarifai
|
|
||||||
memory: meta-reference
|
|
||||||
safety: meta-reference
|
|
||||||
agents: meta-reference
|
|
||||||
telemetry: meta-reference
|
|
||||||
image_type: conda
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue