From 9120e07d9de5a4101f304b380631ab5e1fdc0457 Mon Sep 17 00:00:00 2001
From: Daniel J Walsh <dwalsh@redhat.com>
Date: Tue, 11 Feb 2025 13:47:13 -0500
Subject: [PATCH] Add support for RamaLama

RamaLama is a fully Open Source AI Model tool that facilitate
local management of AI Models.

https://github.com/containers/ramalama

It is fully open source and supports pulling models from HuggingFace,
Ollama, OCI Images, and via URI file://, http://, https://

It uses the llama.cpp and vllm AI engines for running the MODELS.

It also defaults to running the models inside of containers.

Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
---
 llama_stack/templates/ramalama/__init__.py    |   7 +
 llama_stack/templates/ramalama/build.yaml     |  31 +++
 .../templates/ramalama/doc_template.md        | 182 ++++++++++++++++++
 llama_stack/templates/ramalama/ollama.py      | 162 ++++++++++++++++
 llama_stack/templates/ramalama/report.md      |  44 +++++
 .../templates/ramalama/run-with-safety.yaml   | 125 ++++++++++++
 llama_stack/templates/ramalama/run.yaml       | 114 +++++++++++
 7 files changed, 665 insertions(+)
 create mode 100644 llama_stack/templates/ramalama/__init__.py
 create mode 100644 llama_stack/templates/ramalama/build.yaml
 create mode 100644 llama_stack/templates/ramalama/doc_template.md
 create mode 100644 llama_stack/templates/ramalama/ollama.py
 create mode 100644 llama_stack/templates/ramalama/report.md
 create mode 100644 llama_stack/templates/ramalama/run-with-safety.yaml
 create mode 100644 llama_stack/templates/ramalama/run.yaml

diff --git a/llama_stack/templates/ramalama/__init__.py b/llama_stack/templates/ramalama/__init__.py
new file mode 100644
index 000000000..cdb8595fa
--- /dev/null
+++ b/llama_stack/templates/ramalama/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .ramalama import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/ramalama/build.yaml b/llama_stack/templates/ramalama/build.yaml
new file mode 100644
index 000000000..f6f7fcf4f
--- /dev/null
+++ b/llama_stack/templates/ramalama/build.yaml
@@ -0,0 +1,31 @@
+version: '2'
+distribution_spec:
+  description: Use (an external) RamaLama server for running LLM inference
+  providers:
+    inference:
+    - remote::ramalama
+    vector_io:
+    - inline::faiss
+    - remote::chromadb
+    - remote::pgvector
+    safety:
+    - inline::llama-guard
+    agents:
+    - inline::meta-reference
+    telemetry:
+    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - inline::code-interpreter
+    - inline::rag-runtime
+image_type: conda
diff --git a/llama_stack/templates/ramalama/doc_template.md b/llama_stack/templates/ramalama/doc_template.md
new file mode 100644
index 000000000..e1b5e0729
--- /dev/null
+++ b/llama_stack/templates/ramalama/doc_template.md
@@ -0,0 +1,182 @@
+---
+orphan: true
+---
+# RamaLama Distribution
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
+The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
+
+{{ providers_table }}
+
+You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since RamaLama supports GPU acceleration.
+
+{% if run_config_env_vars %}
+### Environment Variables
+
+The following environment variables can be configured:
+
+{% for var, (default_value, description) in run_config_env_vars.items() %}
+- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
+{% endfor %}
+{% endif %}
+
+
+## Setting up RamaLama server
+
+Please check the [RamaLama Documentation](https://github.com/containers/ramalama) on how to install and run RamaLama. After installing RamaLama, you need to run `ramalama serve` to start the server.
+
+In order to load models, you can run:
+
+```bash
+export INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
+
+# ramalama names this model differently, and we must use the ramalama name when loading the model
+export OLLAMA_INFERENCE_MODEL="llama3.2:3b-instruct-fp16"
+ramalama run $OLLAMA_INFERENCE_MODEL --keepalive 60m
+```
+
+If you are using Llama Stack Safety / Shield APIs, you will also need to pull and run the safety model.
+
+```bash
+export SAFETY_MODEL="meta-llama/Llama-Guard-3-1B"
+
+# ramalama names this model differently, and we must use the ramalama name when loading the model
+export OLLAMA_SAFETY_MODEL="llama-guard3:1b"
+ramalama run $OLLAMA_SAFETY_MODEL --keepalive 60m
+```
+
+## Running Llama Stack
+
+Now you are ready to run Llama Stack with RamaLama as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
+
+### Via Podman
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+export LLAMA_STACK_PORT=5001
+podman run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama:z \
+  llamastack/distribution-{{ name }} \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env OLLAMA_URL=http://host.containers.internal:11434
+```
+
+If you are using Llama Stack Safety / Shield APIs, use:
+
+```bash
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
+podman run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama:z \
+  -v ./llama_stack/templates/ramalama/run-with-safety.yaml:/root/my-run.yaml:z \
+  llamastack/distribution-{{ name }} \
+  --yaml-config /root/my-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env SAFETY_MODEL=$SAFETY_MODEL \
+  --env OLLAMA_URL=http://host.containers.internal:11434
+```
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+export LLAMA_STACK_PORT=5001
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
+  llamastack/distribution-{{ name }} \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env OLLAMA_URL=http://host.docker.internal:11434
+```
+
+If you are using Llama Stack Safety / Shield APIs, use:
+
+```bash
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
+  -v ./llama_stack/templates/ramalama/run-with-safety.yaml:/root/my-run.yaml \
+  llamastack/distribution-{{ name }} \
+  --yaml-config /root/my-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env SAFETY_MODEL=$SAFETY_MODEL \
+  --env OLLAMA_URL=http://host.docker.internal:11434
+```
+
+### Via Conda
+
+Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
+
+```bash
+export LLAMA_STACK_PORT=5001
+
+llama stack build --template {{ name }} --image-type conda
+llama stack run ./run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env OLLAMA_URL=http://localhost:11434
+```
+
+If you are using Llama Stack Safety / Shield APIs, use:
+
+```bash
+llama stack run ./run-with-safety.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env SAFETY_MODEL=$SAFETY_MODEL \
+  --env OLLAMA_URL=http://localhost:11434
+```
+
+
+### (Optional) Update Model Serving Configuration
+
+```{note}
+Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ramalama/ramalama.py#L45) for the supported RamaLama models.
+```
+
+To serve a new model with `ramalama`
+```bash
+ramalama run <model_name>
+```
+
+To make sure that the model is being served correctly, run `ramalama ps` to get a list of models being served by ramalama.
+```
+$ ramalama ps
+
+NAME                         ID              SIZE     PROCESSOR    UNTIL
+llama3.1:8b-instruct-fp16    4aacac419454    17 GB    100% GPU     4 minutes from now
+```
+
+To verify that the model served by ramalama is correctly connected to Llama Stack server
+```bash
+$ llama-stack-client models list
++----------------------+----------------------+---------------+-----------------------------------------------+
+| identifier           | llama_model          | provider_id   | metadata                                      |
++======================+======================+===============+===============================================+
+| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | ramalama0       | {'ramalama_model': 'llama3.1:8b-instruct-fp16'} |
++----------------------+----------------------+---------------+-----------------------------------------------+
+```
diff --git a/llama_stack/templates/ramalama/ollama.py b/llama_stack/templates/ramalama/ollama.py
new file mode 100644
index 000000000..96e81cf6e
--- /dev/null
+++ b/llama_stack/templates/ramalama/ollama.py
@@ -0,0 +1,162 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pathlib import Path
+
+from llama_stack.apis.models.models import ModelType
+from llama_stack.distribution.datatypes import (
+    ModelInput,
+    Provider,
+    ShieldInput,
+    ToolGroupInput,
+)
+from llama_stack.providers.inline.inference.sentence_transformers import (
+    SentenceTransformersInferenceConfig,
+)
+from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
+from llama_stack.providers.remote.inference.ramalama import RamaLamaImplConfig
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+
+
+def get_distribution_template() -> DistributionTemplate:
+    providers = {
+        "inference": ["remote::ramalama"],
+        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
+        "safety": ["inline::llama-guard"],
+        "agents": ["inline::meta-reference"],
+        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "inline::code-interpreter",
+            "inline::rag-runtime",
+        ],
+    }
+    name = "ramalama"
+    inference_provider = Provider(
+        provider_id="ramalama",
+        provider_type="remote::ramalama",
+        config=RamaLamaImplConfig.sample_run_config(),
+    )
+    embedding_provider = Provider(
+        provider_id="sentence-transformers",
+        provider_type="inline::sentence-transformers",
+        config=SentenceTransformersInferenceConfig.sample_run_config(),
+    )
+    vector_io_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
+
+    inference_model = ModelInput(
+        model_id="${env.INFERENCE_MODEL}",
+        provider_id="ramalama",
+    )
+    safety_model = ModelInput(
+        model_id="${env.SAFETY_MODEL}",
+        provider_id="ramalama",
+    )
+    embedding_model = ModelInput(
+        model_id="all-MiniLM-L6-v2",
+        provider_id="sentence-transformers",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 384,
+        },
+    )
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="tavily-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::code_interpreter",
+            provider_id="code-interpreter",
+        ),
+    ]
+
+    return DistributionTemplate(
+        name=name,
+        distro_type="self_hosted",
+        description="Use (an external) RamaLama server for running LLM inference",
+        container_image=None,
+        template_path=Path(__file__).parent / "doc_template.md",
+        providers=providers,
+        default_models=[inference_model, safety_model],
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [inference_provider, embedding_provider],
+                    "vector_io": [vector_io_provider],
+                },
+                default_models=[inference_model, embedding_model],
+                default_tool_groups=default_tool_groups,
+            ),
+            "run-with-safety.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [
+                        inference_provider,
+                        embedding_provider,
+                    ],
+                    "vector_io": [vector_io_provider],
+                    "safety": [
+                        Provider(
+                            provider_id="llama-guard",
+                            provider_type="inline::llama-guard",
+                            config={},
+                        ),
+                        Provider(
+                            provider_id="code-scanner",
+                            provider_type="inline::code-scanner",
+                            config={},
+                        ),
+                    ],
+                },
+                default_models=[
+                    inference_model,
+                    safety_model,
+                    embedding_model,
+                ],
+                default_shields=[
+                    ShieldInput(
+                        shield_id="${env.SAFETY_MODEL}",
+                        provider_id="llama-guard",
+                    ),
+                    ShieldInput(
+                        shield_id="CodeScanner",
+                        provider_id="code-scanner",
+                    ),
+                ],
+                default_tool_groups=default_tool_groups,
+            ),
+        },
+        run_config_env_vars={
+            "LLAMA_STACK_PORT": (
+                "5001",
+                "Port for the Llama Stack distribution server",
+            ),
+            "OLLAMA_URL": (
+                "http://127.0.0.1:11434",
+                "URL of the RamaLama server",
+            ),
+            "INFERENCE_MODEL": (
+                "meta-llama/Llama-3.2-3B-Instruct",
+                "Inference model loaded into the RamaLama server",
+            ),
+            "SAFETY_MODEL": (
+                "meta-llama/Llama-Guard-3-1B",
+                "Safety model loaded into the RamaLama server",
+            ),
+        },
+    )
diff --git a/llama_stack/templates/ramalama/report.md b/llama_stack/templates/ramalama/report.md
new file mode 100644
index 000000000..ac95d42f2
--- /dev/null
+++ b/llama_stack/templates/ramalama/report.md
@@ -0,0 +1,44 @@
+# Report for ramalama distribution
+
+## Supported Models
+| Model Descriptor | ramalama |
+|:---|:---|
+| Llama-3-8B-Instruct | ❌ |
+| Llama-3-70B-Instruct | ❌ |
+| Llama3.1-8B-Instruct | ✅ |
+| Llama3.1-70B-Instruct | ✅ |
+| Llama3.1-405B-Instruct | ✅ |
+| Llama3.2-1B-Instruct | ✅ |
+| Llama3.2-3B-Instruct | ✅ |
+| Llama3.2-11B-Vision-Instruct | ✅ |
+| Llama3.2-90B-Vision-Instruct | ✅ |
+| Llama3.3-70B-Instruct | ✅ |
+| Llama-Guard-3-11B-Vision | ❌ |
+| Llama-Guard-3-1B | ✅ |
+| Llama-Guard-3-8B | ✅ |
+| Llama-Guard-2-8B | ❌ |
+
+## Inference
+| Model | API | Capability | Test | Status |
+|:----- |:-----|:-----|:-----|:-----|
+| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
+| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ❌ |
+| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ❌ |
+| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
+| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
+| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
+| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
+| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
+| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
+
+## Vector IO
+| API | Capability | Test | Status |
+|:-----|:-----|:-----|:-----|
+| /retrieve |  | test_vector_db_retrieve | ✅ |
+
+## Agents
+| API | Capability | Test | Status |
+|:-----|:-----|:-----|:-----|
+| /create_agent_turn | rag | test_rag_agent | ✅ |
+| /create_agent_turn | custom_tool | test_custom_tool | ✅ |
+| /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ |
diff --git a/llama_stack/templates/ramalama/run-with-safety.yaml b/llama_stack/templates/ramalama/run-with-safety.yaml
new file mode 100644
index 000000000..043d43c64
--- /dev/null
+++ b/llama_stack/templates/ramalama/run-with-safety.yaml
@@ -0,0 +1,125 @@
+version: '2'
+image_name: ramalama
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ramalama
+    provider_type: remote::ramalama
+    config:
+      url: ${env.OLLAMA_URL:http://localhost:11434}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ramalama/trace_store.db}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: code-interpreter
+    provider_type: inline::code-interpreter
+    config: {}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/registry.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: ramalama
+  model_type: llm
+- metadata: {}
+  model_id: ${env.SAFETY_MODEL}
+  provider_id: ramalama
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
+shields:
+- shield_id: ${env.SAFETY_MODEL}
+  provider_id: llama-guard
+- shield_id: CodeScanner
+  provider_id: code-scanner
+vector_dbs: []
+datasets: []
+scoring_fns: []
+eval_tasks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+- toolgroup_id: builtin::code_interpreter
+  provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/ramalama/run.yaml b/llama_stack/templates/ramalama/run.yaml
new file mode 100644
index 000000000..5013b256c
--- /dev/null
+++ b/llama_stack/templates/ramalama/run.yaml
@@ -0,0 +1,114 @@
+version: '2'
+image_name: ramalama
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ramalama
+    provider_type: remote::ramalama
+    config:
+      url: ${env.OLLAMA_URL:http://localhost:11434}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ramalama/trace_store.db}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: code-interpreter
+    provider_type: inline::code-interpreter
+    config: {}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/registry.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: ramalama
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
+shields: []
+vector_dbs: []
+datasets: []
+scoring_fns: []
+eval_tasks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+- toolgroup_id: builtin::code_interpreter
+  provider_id: code-interpreter
+server:
+  port: 8321