mirror of
https://github.com/meta-llama/llama-stack.git
synced 2026-01-07 09:49:56 +00:00
Add Clarifai as Inference Provider
This commit is contained in:
parent
2a24eb7f53
commit
e2cc93c017
16 changed files with 1039 additions and 277 deletions
|
|
@ -101,6 +101,38 @@
|
|||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"clarifai": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"clarifai",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn"
|
||||
],
|
||||
"dell": [
|
||||
"aiohttp",
|
||||
"aiosqlite",
|
||||
|
|
|
|||
77
docs/source/distributions/remote_hosted_distro/clarifai.md
Normal file
77
docs/source/distributions/remote_hosted_distro/clarifai.md
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Clarifai Distribution
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 2
|
||||
:hidden:
|
||||
|
||||
self
|
||||
```
|
||||
|
||||
The `llamastack/distribution-clarifai` distribution consists of the following provider configurations.
|
||||
|
||||
| API | Provider(s) |
|
||||
|-----|-------------|
|
||||
| agents | `inline::meta-reference` |
|
||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `remote::clarifai` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
|
||||
| vector_io | `inline::faiss` |
|
||||
|
||||
|
||||
### Environment Variables
|
||||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `CLARIFAI_PAT`: Clarifai PAT (default: ``)
|
||||
|
||||
### Models
|
||||
|
||||
The following models are available by default:
|
||||
|
||||
- `meta/Llama-3/Llama-3-8B-Instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
|
||||
- `meta/Llama-3/llama-3-70B-Instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
|
||||
- `meta/Llama-3/llama-3_1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
|
||||
- `meta/Llama-3/llama-3_2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
|
||||
- `meta/Llama-3/llama-3_3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
|
||||
|
||||
|
||||
### Prerequisite: PAT
|
||||
|
||||
Make sure you have access to a Clarifai PAT. You can get one by visiting [Clarifai](https://www.clarifai.com/).
|
||||
|
||||
|
||||
## Running Llama Stack with Clarifai
|
||||
|
||||
You can do this via Conda (build code) or Docker which has a pre-built image.
|
||||
|
||||
### Via Docker
|
||||
|
||||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-clarifai \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env CLARIFAI_PAT=$CLARIFAI_PAT
|
||||
```
|
||||
|
||||
### Via Conda
|
||||
|
||||
```bash
|
||||
llama stack build --template clarifai --image-type conda
|
||||
llama stack run ./run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env CLARIFAI_PAT=$CLARIFAI_PAT
|
||||
```
|
||||
77
docs/source/distributions/self_hosted_distro/clarifai.md
Normal file
77
docs/source/distributions/self_hosted_distro/clarifai.md
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Clarifai Distribution
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 2
|
||||
:hidden:
|
||||
|
||||
self
|
||||
```
|
||||
|
||||
The `llamastack/distribution-clarifai` distribution consists of the following provider configurations.
|
||||
|
||||
| API | Provider(s) |
|
||||
|-----|-------------|
|
||||
| agents | `inline::meta-reference` |
|
||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `remote::clarifai` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
|
||||
| vector_io | `inline::faiss` |
|
||||
|
||||
|
||||
### Environment Variables
|
||||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `CLARIFAI_PAT`: Clarifai PAT (default: ``)
|
||||
|
||||
### Models
|
||||
|
||||
The following models are available by default:
|
||||
|
||||
- `meta/Llama-3/Llama-3-8B-Instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
|
||||
- `meta/Llama-3/llama-3-70B-Instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
|
||||
- `meta/Llama-3/llama-3_1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
|
||||
- `meta/Llama-3/llama-3_2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
|
||||
- `meta/Llama-3/llama-3_3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
|
||||
|
||||
|
||||
### Prerequisite: PAT
|
||||
|
||||
Make sure you have access to a Clarifai PAT. You can get one by visiting [Clarifai](https://www.clarifai.com/).
|
||||
|
||||
|
||||
## Running Llama Stack with Clarifai
|
||||
|
||||
You can do this via Conda (build code) or Docker which has a pre-built image.
|
||||
|
||||
### Via Docker
|
||||
|
||||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-clarifai \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env CLARIFAI_PAT=$CLARIFAI_PAT
|
||||
```
|
||||
|
||||
### Via Conda
|
||||
|
||||
```bash
|
||||
llama stack build --template clarifai --image-type conda
|
||||
llama stack run ./run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env CLARIFAI_PAT=$CLARIFAI_PAT
|
||||
```
|
||||
|
|
@ -1,260 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import AsyncGenerator, List, Optional
|
||||
|
||||
from clarifai import client
|
||||
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
from llama_models.llama3.api.datatypes import Message, StopReason
|
||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.providers.utils.inference.augment_messages import (
|
||||
augment_messages_for_tools,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.routable import RoutableProviderForModels
|
||||
|
||||
from .config import ClarifaiImplConfig
|
||||
|
||||
|
||||
CLARIFAI_SUPPORTED_MODELS = {
|
||||
"Llama3.1-8B-Instruct": "meta/Llama-3/llama-3_1-8b-instruct",
|
||||
"Llama3.1-70B-Instruct": "meta/Llama-3/llama-3-70B-Instruct",
|
||||
"Llama3.2-3B-Instruct": "meta/Llama-3/llama-3_2-3b-instruct",
|
||||
}
|
||||
|
||||
|
||||
class ClarifaiInferenceAdapter(
|
||||
Inference, NeedsRequestProviderData, RoutableProviderForModels
|
||||
):
|
||||
def __init__(self, config: ClarifaiImplConfig) -> None:
|
||||
RoutableProviderForModels.__init__(
|
||||
self, stack_to_provider_models_map=CLARIFAI_SUPPORTED_MODELS
|
||||
)
|
||||
self.config = config
|
||||
tokenizer = Tokenizer.get_instance()
|
||||
self.formatter = ChatFormat(tokenizer)
|
||||
|
||||
@property
|
||||
def client(self) -> client:
|
||||
return client
|
||||
|
||||
async def initialize(self) -> None:
|
||||
return
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
async def completion(
|
||||
self,
|
||||
model: str,
|
||||
content: InterleavedTextMedia,
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> AsyncGenerator:
|
||||
raise NotImplementedError()
|
||||
|
||||
def _messages_to_clarifai_messages(self, messages: list[Message]) -> bytes:
|
||||
clarifai_messages = ""
|
||||
for message in messages:
|
||||
if message.role == "ipython":
|
||||
role = "tool"
|
||||
else:
|
||||
role = message.role
|
||||
clarifai_messages += (
|
||||
f"{{'role': '{role}', 'content': '{message.content}'}}\n"
|
||||
)
|
||||
|
||||
return clarifai_messages.encode()
|
||||
|
||||
def get_clarifai_chat_options(self, request: ChatCompletionRequest) -> dict:
|
||||
options = {}
|
||||
if request.sampling_params is not None:
|
||||
for attr in {"temperature", "top_p", "top_k", "max_tokens"}:
|
||||
if getattr(request.sampling_params, attr):
|
||||
options[attr] = getattr(request.sampling_params, attr)
|
||||
|
||||
return options
|
||||
|
||||
def resolve_clarifai_model(self, model_name: str) -> str:
|
||||
model = self.map_to_provider_model(model_name)
|
||||
assert (
|
||||
model is not None and model in CLARIFAI_SUPPORTED_MODELS.values()
|
||||
), f"Unsupported model: {model_name}, use one of the supported models: {','.join(CLARIFAI_SUPPORTED_MODELS.keys())}"
|
||||
user_id, app_id, model_id = model.split("/")
|
||||
return f"https://clarifai.com/{user_id}/{app_id}/models/{model_id}"
|
||||
|
||||
async def chat_completion(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[Message],
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
tools: Optional[List[ToolDefinition]] = None,
|
||||
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
|
||||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> AsyncGenerator:
|
||||
request = ChatCompletionRequest(
|
||||
model=model,
|
||||
messages=messages,
|
||||
sampling_params=sampling_params,
|
||||
tools=tools or [],
|
||||
tool_choice=tool_choice,
|
||||
tool_prompt_format=tool_prompt_format,
|
||||
stream=stream,
|
||||
logprobs=logprobs,
|
||||
)
|
||||
|
||||
# accumulate sampling params and other options to pass to clarifai
|
||||
options = self.get_clarifai_chat_options(request)
|
||||
clarifai_model = self.resolve_clarifai_model(request.model)
|
||||
messages = augment_messages_for_tools(request)
|
||||
|
||||
if not request.stream:
|
||||
try:
|
||||
r = client.app.Model(
|
||||
url=clarifai_model, pat=self.config.PAT
|
||||
).predict_by_bytes(
|
||||
self._messages_to_clarifai_messages(messages),
|
||||
input_type="text",
|
||||
inference_params=options,
|
||||
)
|
||||
except AssertionError as e:
|
||||
if "CLARIFAI_PAT" in str(e):
|
||||
raise ValueError("Please provide a valid PAT for Clarifai")
|
||||
else:
|
||||
raise e
|
||||
# TODO : Add stop reason to the response, currently not supported by clarifai.
|
||||
stop_reason = StopReason.end_of_turn
|
||||
completion_message = self.formatter.decode_assistant_message_from_content(
|
||||
r.outputs[0].data.text.raw, stop_reason
|
||||
)
|
||||
yield ChatCompletionResponse(
|
||||
completion_message=completion_message,
|
||||
logprobs=None,
|
||||
)
|
||||
else:
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.start,
|
||||
delta="",
|
||||
)
|
||||
)
|
||||
|
||||
buffer = ""
|
||||
ipython = False
|
||||
stop_reason = StopReason.end_of_turn
|
||||
# TODO: Add support for stream, currently not supported by clarifai. But mocked for now.
|
||||
try:
|
||||
chunks = [
|
||||
client.app.Model(url=clarifai_model, pat=self.config.PAT)
|
||||
.predict_by_bytes(
|
||||
self._messages_to_clarifai_messages(messages),
|
||||
input_type="text",
|
||||
inference_params=options,
|
||||
)
|
||||
.outputs[0]
|
||||
.data.text.raw
|
||||
]
|
||||
except AssertionError as e:
|
||||
if "CLARIFAI_PAT" in str(e):
|
||||
raise ValueError("Please provide a valid PAT for Clarifai")
|
||||
else:
|
||||
raise e
|
||||
for chunk in chunks:
|
||||
text = chunk
|
||||
|
||||
if text is None:
|
||||
continue
|
||||
|
||||
# check if its a tool call ( aka starts with <|python_tag|> )
|
||||
if not ipython and text.startswith("<|python_tag|>"):
|
||||
ipython = True
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.progress,
|
||||
delta=ToolCallDelta(
|
||||
content="",
|
||||
parse_status=ToolCallParseStatus.started,
|
||||
),
|
||||
)
|
||||
)
|
||||
buffer += text
|
||||
continue
|
||||
|
||||
if ipython:
|
||||
if text == "<|eot_id|>":
|
||||
stop_reason = StopReason.end_of_turn
|
||||
text = ""
|
||||
continue
|
||||
elif text == "<|eom_id|>":
|
||||
stop_reason = StopReason.end_of_message
|
||||
text = ""
|
||||
continue
|
||||
|
||||
buffer += text
|
||||
delta = ToolCallDelta(
|
||||
content=text,
|
||||
parse_status=ToolCallParseStatus.in_progress,
|
||||
)
|
||||
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.progress,
|
||||
delta=delta,
|
||||
stop_reason=stop_reason,
|
||||
)
|
||||
)
|
||||
else:
|
||||
buffer += text
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.progress,
|
||||
delta=text,
|
||||
stop_reason=stop_reason,
|
||||
)
|
||||
)
|
||||
|
||||
# parse tool calls and report errors
|
||||
message = self.formatter.decode_assistant_message_from_content(
|
||||
buffer, stop_reason
|
||||
)
|
||||
parsed_tool_calls = len(message.tool_calls) > 0
|
||||
if ipython and not parsed_tool_calls:
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.progress,
|
||||
delta=ToolCallDelta(
|
||||
content="",
|
||||
parse_status=ToolCallParseStatus.failure,
|
||||
),
|
||||
stop_reason=stop_reason,
|
||||
)
|
||||
)
|
||||
|
||||
for tool_call in message.tool_calls:
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.progress,
|
||||
delta=ToolCallDelta(
|
||||
content=tool_call,
|
||||
parse_status=ToolCallParseStatus.success,
|
||||
),
|
||||
stop_reason=stop_reason,
|
||||
)
|
||||
)
|
||||
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.complete,
|
||||
delta="",
|
||||
stop_reason=stop_reason,
|
||||
)
|
||||
)
|
||||
|
|
@ -184,8 +184,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
pip_packages=[
|
||||
"clarifai",
|
||||
],
|
||||
module="llama_stack.providers.adapters.inference.clarifai",
|
||||
config_class="llama_stack.providers.adapters.inference.clarifai.ClarifaiImplConfig",
|
||||
module="llama_stack.providers.remote.inference.clarifai",
|
||||
config_class="llama_stack.providers.remote.inference.clarifai.ClarifaiImplConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
|
|||
|
|
@ -9,9 +9,7 @@ from .config import ClarifaiImplConfig
|
|||
|
||||
|
||||
async def get_adapter_impl(config: ClarifaiImplConfig, _deps):
|
||||
assert isinstance(
|
||||
config, ClarifaiImplConfig
|
||||
), f"Unexpected config type: {type(config)}"
|
||||
assert isinstance(config, ClarifaiImplConfig), f"Unexpected config type: {type(config)}"
|
||||
impl = ClarifaiInferenceAdapter(config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
204
llama_stack/providers/remote/inference/clarifai/clarifai.py
Normal file
204
llama_stack/providers/remote/inference/clarifai/clarifai.py
Normal file
|
|
@ -0,0 +1,204 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import AsyncGenerator, List, Optional, Union
|
||||
|
||||
from clarifai import client
|
||||
|
||||
from llama_stack import logcat
|
||||
from llama_stack.apis.common.content_types import (
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
)
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
CompletionRequest,
|
||||
EmbeddingsResponse,
|
||||
EmbeddingTaskType,
|
||||
Inference,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
ResponseFormat,
|
||||
ResponseFormatType,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelRegistryHelper,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
get_sampling_options,
|
||||
process_chat_completion_response,
|
||||
process_chat_completion_stream_response,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
chat_completion_request_to_prompt,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
from .config import ClarifaiImplConfig
|
||||
from .models import MODEL_ENTRIES
|
||||
|
||||
|
||||
class ClarifaiInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
|
||||
def __init__(self, config: ClarifaiImplConfig) -> None:
|
||||
ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
|
||||
self.config = config
|
||||
|
||||
async def initialize(self) -> None:
|
||||
return
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
def _get_client(self) -> client:
|
||||
return client
|
||||
|
||||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content: InterleavedContent,
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> AsyncGenerator:
|
||||
return NotImplementedError
|
||||
|
||||
def resolve_clarifai_model(self, model_name: str) -> str:
|
||||
# model = self.get_llama_model(model_name)
|
||||
# assert (
|
||||
# model is not None and model in CLARIFAI_SUPPORTED_MODELS.values()
|
||||
# ), f"Unsupported model: {model_name}, use one of the supported models: {','.join(CLARIFAI_SUPPORTED_MODELS.keys())}"
|
||||
user_id, app_id, model_id = model_name.split("/")
|
||||
return f"https://clarifai.com/{user_id}/{app_id}/models/{model_id}"
|
||||
|
||||
# async def _nonstream_completion(self, request: CompletionRequest) -> ChatCompletionResponse:
|
||||
# params = await self._get_params(request)
|
||||
# model_url = self.resolve_clarifai_model(request.model)
|
||||
# r = self._get_client().app.Model(url=model_url, pat=self.config.PAT).predict_by_bytes(**params)
|
||||
# return process_completion_response(r)
|
||||
|
||||
# async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
|
||||
# params = await self._get_params(request)
|
||||
# model_url = self.resolve_clarifai_model(request.model)
|
||||
|
||||
# async def _to_async_generator():
|
||||
# s = self._get_client().app.Model(url=model_url, pat=self.config.PAT).stream_by_bytes(**params)
|
||||
# for chunk in s:
|
||||
# yield chunk
|
||||
|
||||
# stream = _to_async_generator()
|
||||
# async for chunk in process_completion_stream_response(stream):
|
||||
# yield chunk
|
||||
|
||||
def _build_options(
|
||||
self,
|
||||
sampling_params: Optional[SamplingParams],
|
||||
logprobs: Optional[LogProbConfig],
|
||||
fmt: ResponseFormat,
|
||||
) -> dict:
|
||||
options = get_sampling_options(sampling_params)
|
||||
if fmt:
|
||||
if fmt.type == ResponseFormatType.json_schema.value:
|
||||
options["response_format"] = {
|
||||
"type": "json_object",
|
||||
"schema": fmt.json_schema,
|
||||
}
|
||||
elif fmt.type == ResponseFormatType.grammar.value:
|
||||
raise NotImplementedError("Grammar response format not supported yet")
|
||||
else:
|
||||
raise ValueError(f"Unknown response format {fmt.type}")
|
||||
|
||||
if logprobs and logprobs.top_k:
|
||||
if logprobs.top_k != 1:
|
||||
raise ValueError(
|
||||
f"Unsupported value: Clarifai only supports logprobs top_k=1. {logprobs.top_k} was provided",
|
||||
)
|
||||
options["logprobs"] = 1
|
||||
|
||||
return options
|
||||
|
||||
async def chat_completion(
|
||||
self,
|
||||
model_id: str,
|
||||
messages: List[Message],
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
tools: Optional[List[ToolDefinition]] = None,
|
||||
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = None,
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
tool_config: Optional[ToolConfig] = None,
|
||||
) -> AsyncGenerator:
|
||||
model = await self.model_store.get_model(model_id)
|
||||
request = ChatCompletionRequest(
|
||||
model=model.provider_resource_id,
|
||||
messages=messages,
|
||||
sampling_params=sampling_params,
|
||||
tools=tools or [],
|
||||
response_format=response_format,
|
||||
stream=stream,
|
||||
logprobs=logprobs,
|
||||
tool_config=tool_config,
|
||||
)
|
||||
|
||||
if stream:
|
||||
return self._stream_chat_completion(request)
|
||||
else:
|
||||
return await self._nonstream_chat_completion(request)
|
||||
|
||||
async def _nonstream_chat_completion(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
|
||||
params = await self._get_params(request)
|
||||
model_url = self.resolve_clarifai_model(request.model)
|
||||
r = self._get_client().app.Model(url=model_url, pat=self.config.PAT).predict_by_bytes(**params)
|
||||
return process_chat_completion_response(r)
|
||||
|
||||
async def _stream_chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator:
|
||||
params = await self._get_params(request)
|
||||
model_url = self.resolve_clarifai_model(request.model)
|
||||
|
||||
async def _to_async_generator():
|
||||
s = self._get_client().app.Model(url=model_url, pat=self.config.PAT).predict_by_bytes(**params)
|
||||
for chunk in s:
|
||||
yield chunk
|
||||
|
||||
stream = _to_async_generator()
|
||||
async for chunk in process_chat_completion_stream_response(stream):
|
||||
yield chunk
|
||||
|
||||
async def _get_params(self, request: Union[ChatCompletionRequest, CompletionRequest]) -> dict:
|
||||
input_dict = {}
|
||||
media_present = request_has_media(request)
|
||||
llama_model = self.get_llama_model(request.model)
|
||||
if isinstance(request, ChatCompletionRequest):
|
||||
assert not media_present, "Clarifai does not support media for ChatCompletion requests"
|
||||
input_dict["input_bytes"] = (await chat_completion_request_to_prompt(request, llama_model)).encode()
|
||||
|
||||
params = {
|
||||
**input_dict,
|
||||
"input_type": "text",
|
||||
"inference_params": self._build_options(request.sampling_params, request.logprobs, request.response_format),
|
||||
}
|
||||
logcat.debug("inference", f"params to clarifai: {params}")
|
||||
return params
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: List[str] | List[InterleavedContentItem],
|
||||
text_truncation: Optional[TextTruncation] = TextTruncation.none,
|
||||
output_dimension: Optional[int] = None,
|
||||
task_type: Optional[EmbeddingTaskType] = None,
|
||||
) -> EmbeddingsResponse:
|
||||
raise NotImplementedError()
|
||||
|
|
@ -4,11 +4,12 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Optional
|
||||
from typing import Any, Dict
|
||||
|
||||
from llama_models.schema_utils import json_schema_type
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ClarifaiImplConfig(BaseModel):
|
||||
|
|
@ -16,3 +17,9 @@ class ClarifaiImplConfig(BaseModel):
|
|||
default=None,
|
||||
description="The Clarifai Personal Access Token (PAT) to use for authentication.",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
|
||||
return {
|
||||
"PAT": "${env.CLARIFAI_PAT}",
|
||||
}
|
||||
33
llama_stack/providers/remote/inference/clarifai/models.py
Normal file
33
llama_stack/providers/remote/inference/clarifai/models.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.models.llama.datatypes import CoreModelId
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
build_hf_repo_model_entry,
|
||||
)
|
||||
|
||||
MODEL_ENTRIES = [
|
||||
build_hf_repo_model_entry(
|
||||
"meta/Llama-3/Llama-3-8B-Instruct",
|
||||
CoreModelId.llama3_8b_instruct.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"meta/Llama-3/llama-3-70B-Instruct",
|
||||
CoreModelId.llama3_70b_instruct.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"meta/Llama-3/llama-3_1-8b-instruct",
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"meta/Llama-3/llama-3_2-3b-instruct",
|
||||
CoreModelId.llama3_2_3b_instruct.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"meta/Llama-3/llama-3_3-70b-instruct",
|
||||
CoreModelId.llama3_3_70b_instruct.value,
|
||||
),
|
||||
]
|
||||
7
llama_stack/templates/clarifai/__init__.py
Normal file
7
llama_stack/templates/clarifai/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .clarifai import get_distribution_template # noqa: F401
|
||||
30
llama_stack/templates/clarifai/build.yaml
Normal file
30
llama_stack/templates/clarifai/build.yaml
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
version: '2'
|
||||
distribution_spec:
|
||||
description: Use Clarifai for running LLM inference
|
||||
providers:
|
||||
inference:
|
||||
- remote::clarifai
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
safety:
|
||||
- inline::llama-guard
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
eval:
|
||||
- inline::meta-reference
|
||||
datasetio:
|
||||
- remote::huggingface
|
||||
- inline::localfs
|
||||
scoring:
|
||||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
160
llama_stack/templates/clarifai/clarifai.py
Normal file
160
llama_stack/templates/clarifai/clarifai.py
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||
from llama_stack.providers.remote.inference.clarifai import ClarifaiImplConfig
|
||||
from llama_stack.providers.remote.inference.clarifai.models import MODEL_ENTRIES
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::clarifai"],
|
||||
"vector_io": ["inline::faiss"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::rag-runtime",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
name = "clarifai"
|
||||
inference_provider = Provider(
|
||||
provider_id="clarifai",
|
||||
provider_type="remote::clarifai",
|
||||
config=ClarifaiImplConfig.sample_run_config(),
|
||||
)
|
||||
vector_io_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
)
|
||||
embedding_provider = Provider(
|
||||
provider_id="sentence-transformers",
|
||||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
available_models = {
|
||||
"clarifai": MODEL_ENTRIES,
|
||||
}
|
||||
default_models = get_model_registry(available_models)
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::rag",
|
||||
provider_id="rag-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code-interpreter",
|
||||
),
|
||||
]
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
provider_id="sentence-transformers",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
distro_type="remote_hosted",
|
||||
description="Use Clarifai for running LLM inference",
|
||||
container_image=None,
|
||||
template_path=Path(__file__).parent / "doc_template.md",
|
||||
providers=providers,
|
||||
available_models_by_provider=available_models,
|
||||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=default_models + [embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||
),
|
||||
"run-with-safety.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [
|
||||
inference_provider,
|
||||
embedding_provider,
|
||||
],
|
||||
"vector_io": [vector_io_provider],
|
||||
"safety": [
|
||||
Provider(
|
||||
provider_id="llama-guard",
|
||||
provider_type="inline::llama-guard",
|
||||
config={},
|
||||
),
|
||||
Provider(
|
||||
provider_id="llama-guard-vision",
|
||||
provider_type="inline::llama-guard",
|
||||
config={},
|
||||
),
|
||||
Provider(
|
||||
provider_id="code-scanner",
|
||||
provider_type="inline::code-scanner",
|
||||
config={},
|
||||
),
|
||||
],
|
||||
},
|
||||
default_models=[
|
||||
*default_models,
|
||||
embedding_model,
|
||||
],
|
||||
default_shields=[
|
||||
ShieldInput(
|
||||
shield_id="meta-llama/Llama-Guard-3-8B",
|
||||
provider_id="llama-guard",
|
||||
),
|
||||
ShieldInput(
|
||||
shield_id="meta-llama/Llama-Guard-3-11B-Vision",
|
||||
provider_id="llama-guard-vision",
|
||||
),
|
||||
ShieldInput(
|
||||
shield_id="CodeScanner",
|
||||
provider_id="code-scanner",
|
||||
),
|
||||
],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"CLARIFAI_PAT": (
|
||||
"",
|
||||
"Clarifai PAT",
|
||||
),
|
||||
},
|
||||
)
|
||||
68
llama_stack/templates/clarifai/doc_template.md
Normal file
68
llama_stack/templates/clarifai/doc_template.md
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
---
|
||||
orphan: true
|
||||
---
|
||||
# Clarifai Distribution
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 2
|
||||
:hidden:
|
||||
|
||||
self
|
||||
```
|
||||
|
||||
The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
|
||||
|
||||
{{ providers_table }}
|
||||
|
||||
{% if run_config_env_vars %}
|
||||
### Environment Variables
|
||||
|
||||
The following environment variables can be configured:
|
||||
|
||||
{% for var, (default_value, description) in run_config_env_vars.items() %}
|
||||
- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
{% if default_models %}
|
||||
### Models
|
||||
|
||||
The following models are available by default:
|
||||
|
||||
{% for model in default_models %}
|
||||
- `{{ model.model_id }} {{ model.doc_string }}`
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
|
||||
### Prerequisite: PAT
|
||||
|
||||
Make sure you have access to a Clarifai PAT. You can get one by visiting [Clarifai](https://www.clarifai.com/).
|
||||
|
||||
|
||||
## Running Llama Stack with Clarifai
|
||||
|
||||
You can do this via Conda (build code) or Docker which has a pre-built image.
|
||||
|
||||
### Via Docker
|
||||
|
||||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env CLARIFAI_PAT=$CLARIFAI_PAT
|
||||
```
|
||||
|
||||
### Via Conda
|
||||
|
||||
```bash
|
||||
llama stack build --template clarifai --image-type conda
|
||||
llama stack run ./run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env CLARIFAI_PAT=$CLARIFAI_PAT
|
||||
```
|
||||
175
llama_stack/templates/clarifai/run-with-safety.yaml
Normal file
175
llama_stack/templates/clarifai/run-with-safety.yaml
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
version: '2'
|
||||
image_name: clarifai
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: clarifai
|
||||
provider_type: remote::clarifai
|
||||
config:
|
||||
PAT: ${env.CLARIFAI_PAT}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/faiss_store.db
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
config: {}
|
||||
- provider_id: llama-guard-vision
|
||||
provider_type: inline::llama-guard
|
||||
config: {}
|
||||
- provider_id: code-scanner
|
||||
provider_type: inline::code-scanner
|
||||
config: {}
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/agents_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/clarifai/trace_store.db}
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config: {}
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config: {}
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config: {}
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: meta/Llama-3/Llama-3-8B-Instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3-8B-Instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/Llama-3/llama-3-70B-Instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3-70B-Instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/Llama-3/llama-3_1-8b-instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/Llama-3/llama-3_2-3b-instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/Llama-3/llama-3_3-70b-instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
provider_id: llama-guard
|
||||
- shield_id: meta-llama/Llama-Guard-3-11B-Vision
|
||||
provider_id: llama-guard-vision
|
||||
- shield_id: CodeScanner
|
||||
provider_id: code-scanner
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
164
llama_stack/templates/clarifai/run.yaml
Normal file
164
llama_stack/templates/clarifai/run.yaml
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
version: '2'
|
||||
image_name: clarifai
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: clarifai
|
||||
provider_type: remote::clarifai
|
||||
config:
|
||||
PAT: ${env.CLARIFAI_PAT}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/faiss_store.db
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
config: {}
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/agents_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/clarifai/trace_store.db}
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config: {}
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config: {}
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config: {}
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/clarifai}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: meta/Llama-3/Llama-3-8B-Instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3-8B-Instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/Llama-3-8B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/Llama-3/llama-3-70B-Instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3-70B-Instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/Llama-3/llama-3_1-8b-instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3_1-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/Llama-3/llama-3_2-3b-instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3_2-3b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta/Llama-3/llama-3_3-70b-instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: clarifai
|
||||
provider_model_id: meta/Llama-3/llama-3_3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
name: local-clarifai
|
||||
distribution_spec:
|
||||
description: Use Clarifai for running LLM inference
|
||||
providers:
|
||||
inference: remote::clarifai
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
||||
Loading…
Add table
Add a link
Reference in a new issue