get ollama working

2025-12-03 09:53:45 +00:00 · 2024-08-07 17:52:49 -07:00 · 2024-08-07 17:52:49 -07:00 · 171a178783
commit 171a178783
parent ea50086190
9 changed files with 151 additions and 375 deletions
--- a/llama_toolchain/distribution/install_distribution.sh
+++ b/llama_toolchain/distribution/install_distribution.sh
@ -10,6 +10,7 @@ set -euo pipefail

 # Define color codes
 RED='\033[0;31m'
+GREEN='\033[0;32m'
 NC='\033[0m' # No Color

 error_handler() {
@ -78,6 +79,8 @@ pip_dependencies="$3"

 ensure_conda_env_python310 "$env_name" "$pip_dependencies"

+echo -e "${GREEN}Successfully setup distribution environment. Starting to configure ....${NC}"
+
 eval "$(conda shell.bash hook)"
 conda deactivate && conda activate "$env_name"

--- a/llama_toolchain/inference/meta_reference/generation.py
+++ b/llama_toolchain/inference/meta_reference/generation.py
@ -41,7 +41,10 @@ def model_checkpoint_dir(model) -> str:
    if not Path(checkpoint_dir / "consolidated.00.pth").exists():
        checkpoint_dir = checkpoint_dir / "original"

-    assert checkpoint_dir.exists(), f"Could not find checkpoint dir: {checkpoint_dir}"
+    assert checkpoint_dir.exists(), (
+        f"Could not find checkpoint dir: {checkpoint_dir}."
+        f"Please download model using `llama download {model.descriptor()}`"
+    )
    return str(checkpoint_dir)


--- a/llama_toolchain/inference/ollama/config.py
+++ b/llama_toolchain/inference/ollama/config.py
@ -10,5 +10,7 @@ from strong_typing.schema import json_schema_type

@json_schema_type
 class OllamaImplConfig(BaseModel):
-    model: str = Field(..., description="The name of the model in ollama catalog")
-    url: str = Field(..., description="The URL for the ollama server")
+    url: str = Field(
+        default="http://localhost:11434",
+        description="The URL for the ollama server",
+    )
--- a/llama_toolchain/inference/ollama/ollama.py
+++ b/llama_toolchain/inference/ollama/ollama.py
@ -5,10 +5,10 @@
 # the root directory of this source tree.

 import uuid
-
-from typing import AsyncGenerator
+from typing import AsyncGenerator, Dict

 import httpx
+
 from llama_models.llama3_1.api.datatypes import (
    BuiltinTool,
    CompletionMessage,
@ -17,11 +17,8 @@ from llama_models.llama3_1.api.datatypes import (
    ToolCall,
 )
 from llama_models.llama3_1.api.tool_utils import ToolUtils
-
 from llama_models.sku_list import resolve_model
-
-from ollama import AsyncClient
-
+from llama_toolchain.distribution.datatypes import Api, ProviderSpec
 from llama_toolchain.inference.api import (
    ChatCompletionRequest,
    ChatCompletionResponse,
@ -33,18 +30,21 @@ from llama_toolchain.inference.api import (
    ToolCallDelta,
    ToolCallParseStatus,
 )
+from ollama import AsyncClient

 from .config import OllamaImplConfig

 # TODO: Eventually this will move to the llama cli model list command
 # mapping of Model SKUs to ollama models
 OLLAMA_SUPPORTED_SKUS = {
-    "Meta-Llama3.1-8B-Instruct": "llama3.1:8b-instruct-fp16"
-    # TODO: Add other variants for llama3.1
+    "Meta-Llama3.1-8B-Instruct": "llama3.1:8b-instruct-fp16",
+    "Meta-Llama3.1-70B-Instruct": "llama3.1:70b-instruct-fp16",
 }


-async def get_provider_impl(config: OllamaImplConfig) -> Inference:
+async def get_provider_impl(
+    config: OllamaImplConfig, _deps: Dict[Api, ProviderSpec]
+) -> Inference:
    assert isinstance(
        config, OllamaImplConfig
    ), f"Unexpected config type: {type(config)}"
@ -57,15 +57,14 @@ class OllamaInference(Inference):

    def __init__(self, config: OllamaImplConfig) -> None:
        self.config = config
-        self.model = config.model
+
+    @property
+    def client(self) -> AsyncClient:
+        return AsyncClient(host=self.config.url)

    async def initialize(self) -> None:
-        self.client = AsyncClient(host=self.config.url)
        try:
-            status = await self.client.pull(self.model)
-            assert (
-                status["status"] == "success"
-            ), f"Failed to pull model {self.model} in ollama"
+            await self.client.ps()
        except httpx.ConnectError:
            print(
                "Ollama Server is not running, start it using `ollama serve` in a separate terminal"
@ -81,7 +80,11 @@ class OllamaInference(Inference):
    def _messages_to_ollama_messages(self, messages: list[Message]) -> list:
        ollama_messages = []
        for message in messages:
-            ollama_messages.append({"role": message.role, "content": message.content})
+            if message.role == "ipython":
+                role = "tool"
+            else:
+                role = message.role
+            ollama_messages.append({"role": role, "content": message.content})

        return ollama_messages

@ -112,6 +115,21 @@ class OllamaInference(Inference):
        # accumulate sampling params and other options to pass to ollama
        options = self.get_ollama_chat_options(request)
        ollama_model = self.resolve_ollama_model(request.model)
+
+        res = await self.client.ps()
+        need_model_pull = True
+        for r in res["models"]:
+            if ollama_model == r["model"]:
+                need_model_pull = False
+                break
+
+        if need_model_pull:
+            print(f"Pulling model: {ollama_model}")
+            status = await self.client.pull(ollama_model)
+            assert (
+                status["status"] == "success"
+            ), f"Failed to pull model {self.model} in ollama"
+
        if not request.stream:
            r = await self.client.chat(
                model=ollama_model,
@ -141,7 +159,6 @@ class OllamaInference(Inference):
                    delta="",
                )
            )
-
            stream = await self.client.chat(
                model=ollama_model,
                messages=self._messages_to_ollama_messages(request.messages),
@ -154,11 +171,10 @@ class OllamaInference(Inference):
            stop_reason = None

            async for chunk in stream:
-                # check if ollama is done
                if chunk["done"]:
-                    if chunk["done_reason"] == "stop":
+                    if stop_reason is None and chunk["done_reason"] == "stop":
                        stop_reason = StopReason.end_of_turn
-                    elif chunk["done_reason"] == "length":
+                    elif stop_reason is None and chunk["done_reason"] == "length":
                        stop_reason = StopReason.out_of_tokens
                    break

@ -176,7 +192,7 @@ class OllamaInference(Inference):
                            ),
                        )
                    )
-                    buffer = buffer[len("<|python_tag|>") :]
+                    buffer += text
                    continue

                if ipython:
@ -214,7 +230,6 @@ class OllamaInference(Inference):

            # parse tool calls and report errors
            message = decode_assistant_message_from_content(buffer, stop_reason)
-
            parsed_tool_calls = len(message.tool_calls) > 0
            if ipython and not parsed_tool_calls:
                yield ChatCompletionResponseStreamChunk(
--- a/llama_toolchain/safety/meta_reference/config.py
+++ b/llama_toolchain/safety/meta_reference/config.py
@ -10,14 +10,14 @@ from pydantic import BaseModel


 class LlamaGuardShieldConfig(BaseModel):
-    model: str
-    excluded_categories: List[str]
+    model: str = "Llama-Guard-3-8B"
+    excluded_categories: List[str] = []
    disable_input_check: bool = False
    disable_output_check: bool = False


 class PromptGuardShieldConfig(BaseModel):
-    model: str
+    model: str = "Prompt-Guard-86M"


 class SafetyConfig(BaseModel):
--- a/ollama_install.sh
+++ b/ollama_install.sh
@ -1,340 +0,0 @@
-#!/bin/sh
-# This script installs Ollama on Linux.
-# It detects the current operating system architecture and installs the appropriate version of Ollama.
-
-set -eu
-
-status() { echo ">>> $*" >&2; }
-error() { echo "ERROR $*"; exit 1; }
-warning() { echo "WARNING: $*"; }
-
-TEMP_DIR=$(mktemp -d)
-cleanup() { rm -rf $TEMP_DIR; }
-trap cleanup EXIT
-
-available() { command -v $1 >/dev/null; }
-require() {
-    local MISSING=''
-    for TOOL in $*; do
-        if ! available $TOOL; then
-            MISSING="$MISSING $TOOL"
-        fi
-    done
-
-    echo $MISSING
-}
-
-[ "$(uname -s)" = "Linux" ] || error 'This script is intended to run on Linux only.'
-
-ARCH=$(uname -m)
-case "$ARCH" in
-    x86_64) ARCH="amd64" ;;
-    aarch64|arm64) ARCH="arm64" ;;
-    *) error "Unsupported architecture: $ARCH" ;;
-esac
-
-IS_WSL2=false
-
-KERN=$(uname -r)
-case "$KERN" in
-    *icrosoft*WSL2 | *icrosoft*wsl2) IS_WSL2=true;;
-    *icrosoft) error "Microsoft WSL1 is not currently supported. Please upgrade to WSL2 with 'wsl --set-version <distro> 2'" ;;
-    *) ;;
-esac
-
-VER_PARAM="${OLLAMA_VERSION:+?version=$OLLAMA_VERSION}"
-
-SUDO=
-if [ "$(id -u)" -ne 0 ]; then
-    # Running as root, no need for sudo
-    if ! available sudo; then
-        error "This script requires superuser permissions. Please re-run as root."
-    fi
-
-    SUDO="sudo"
-fi
-
-NEEDS=$(require curl awk grep sed tee xargs)
-if [ -n "$NEEDS" ]; then
-    status "ERROR: The following tools are required but missing:"
-    for NEED in $NEEDS; do
-        echo "  - $NEED"
-    done
-    exit 1
-fi
-
-status "Downloading ollama..."
-curl --fail --show-error --location --progress-bar -o $TEMP_DIR/ollama "https://ollama.com/download/ollama-linux-${ARCH}${VER_PARAM}"
-
-for BINDIR in /usr/local/bin /usr/bin /bin; do
-    echo $PATH | grep -q $BINDIR && break || continue
-done
-
-status "Installing ollama to $BINDIR..."
-$SUDO install -o0 -g0 -m755 -d $BINDIR
-$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama
-
-install_success() {
-    status 'The Ollama API is now available at 127.0.0.1:11434.'
-    status 'Install complete. Run "ollama" from the command line.'
-}
-trap install_success EXIT
-
-# Everything from this point onwards is optional.
-
-configure_systemd() {
-    if ! id ollama >/dev/null 2>&1; then
-        status "Creating ollama user..."
-        $SUDO useradd -r -s /bin/false -U -m -d /usr/share/ollama ollama
-    fi
-    if getent group render >/dev/null 2>&1; then
-        status "Adding ollama user to render group..."
-        $SUDO usermod -a -G render ollama
-    fi
-    if getent group video >/dev/null 2>&1; then
-        status "Adding ollama user to video group..."
-        $SUDO usermod -a -G video ollama
-    fi
-
-    status "Adding current user to ollama group..."
-    $SUDO usermod -a -G ollama $(whoami)
-
-    status "Creating ollama systemd service..."
-    cat <<EOF | $SUDO tee /etc/systemd/system/ollama.service >/dev/null
-[Unit]
-Description=Ollama Service
-After=network-online.target
-
-[Service]
-ExecStart=$BINDIR/ollama serve
-User=ollama
-Group=ollama
-Restart=always
-RestartSec=3
-Environment="PATH=$PATH"
-
-[Install]
-WantedBy=default.target
-EOF
-    SYSTEMCTL_RUNNING="$(systemctl is-system-running || true)"
-    case $SYSTEMCTL_RUNNING in
-        running|degraded)
-            status "Enabling and starting ollama service..."
-            $SUDO systemctl daemon-reload
-            $SUDO systemctl enable ollama
-
-            start_service() { $SUDO systemctl restart ollama; }
-            trap start_service EXIT
-            ;;
-    esac
-}
-
-if available systemctl; then
-    configure_systemd
-fi
-
-# WSL2 only supports GPUs via nvidia passthrough
-# so check for nvidia-smi to determine if GPU is available
-if [ "$IS_WSL2" = true ]; then
-    if available nvidia-smi && [ -n "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")" ]; then
-        status "Nvidia GPU detected."
-    fi
-    install_success
-    exit 0
-fi
-
-# Install GPU dependencies on Linux
-if ! available lspci && ! available lshw; then
-    warning "Unable to detect NVIDIA/AMD GPU. Install lspci or lshw to automatically detect and install GPU dependencies."
-    exit 0
-fi
-
-check_gpu() {
-    # Look for devices based on vendor ID for NVIDIA and AMD
-    case $1 in
-        lspci)
-            case $2 in
-                nvidia) available lspci && lspci -d '10de:' | grep -q 'NVIDIA' || return 1 ;;
-                amdgpu) available lspci && lspci -d '1002:' | grep -q 'AMD' || return 1 ;;
-            esac ;;
-        lshw)
-            case $2 in
-                nvidia) available lshw && $SUDO lshw -c display -numeric -disable network | grep -q 'vendor: .* \[10DE\]' || return 1 ;;
-                amdgpu) available lshw && $SUDO lshw -c display -numeric -disable network | grep -q 'vendor: .* \[1002\]' || return 1 ;;
-            esac ;;
-        nvidia-smi) available nvidia-smi || return 1 ;;
-    esac
-}
-
-if check_gpu nvidia-smi; then
-    status "NVIDIA GPU installed."
-    exit 0
-fi
-
-if ! check_gpu lspci nvidia && ! check_gpu lshw nvidia && ! check_gpu lspci amdgpu && ! check_gpu lshw amdgpu; then
-    install_success
-    warning "No NVIDIA/AMD GPU detected. Ollama will run in CPU-only mode."
-    exit 0
-fi
-
-if check_gpu lspci amdgpu || check_gpu lshw amdgpu; then
-    # Look for pre-existing ROCm v6 before downloading the dependencies
-    for search in "${HIP_PATH:-''}" "${ROCM_PATH:-''}" "/opt/rocm" "/usr/lib64"; do
-        if [ -n "${search}" ] && [ -e "${search}/libhipblas.so.2" -o -e "${search}/lib/libhipblas.so.2" ]; then
-            status "Compatible AMD GPU ROCm library detected at ${search}"
-            install_success
-            exit 0
-        fi
-    done
-
-    status "Downloading AMD GPU dependencies..."
-    $SUDO rm -rf /usr/share/ollama/lib
-    $SUDO chmod o+x /usr/share/ollama
-    $SUDO install -o ollama -g ollama -m 755 -d /usr/share/ollama/lib/rocm
-    curl --fail --show-error --location --progress-bar "https://ollama.com/download/ollama-linux-amd64-rocm.tgz${VER_PARAM}" \
-        | $SUDO tar zx --owner ollama --group ollama -C /usr/share/ollama/lib/rocm .
-    install_success
-    status "AMD GPU ready."
-    exit 0
-fi
-
-CUDA_REPO_ERR_MSG="NVIDIA GPU detected, but your OS and Architecture are not supported by NVIDIA.  Please install the CUDA driver manually https://docs.nvidia.com/cuda/cuda-installation-guide-linux/"
-# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-7-centos-7
-# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-8-rocky-8
-# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-9-rocky-9
-# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#fedora
-install_cuda_driver_yum() {
-    status 'Installing NVIDIA repository...'
-    
-    case $PACKAGE_MANAGER in
-        yum)
-            $SUDO $PACKAGE_MANAGER -y install yum-utils
-            if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo" >/dev/null ; then
-                $SUDO $PACKAGE_MANAGER-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
-            else
-                error $CUDA_REPO_ERR_MSG
-            fi
-            ;;
-        dnf)
-            if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo" >/dev/null ; then
-                $SUDO $PACKAGE_MANAGER config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
-            else
-                error $CUDA_REPO_ERR_MSG
-            fi
-            ;;
-    esac
-
-    case $1 in
-        rhel)
-            status 'Installing EPEL repository...'
-            # EPEL is required for third-party dependencies such as dkms and libvdpau
-            $SUDO $PACKAGE_MANAGER -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-$2.noarch.rpm || true
-            ;;
-    esac
-
-    status 'Installing CUDA driver...'
-
-    if [ "$1" = 'centos' ] || [ "$1$2" = 'rhel7' ]; then
-        $SUDO $PACKAGE_MANAGER -y install nvidia-driver-latest-dkms
-    fi
-
-    $SUDO $PACKAGE_MANAGER -y install cuda-drivers
-}
-
-# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#ubuntu
-# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#debian
-install_cuda_driver_apt() {
-    status 'Installing NVIDIA repository...'
-    if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb" >/dev/null ; then
-        curl -fsSL -o $TEMP_DIR/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb
-    else
-        error $CUDA_REPO_ERR_MSG
-    fi
-
-    case $1 in
-        debian)
-            status 'Enabling contrib sources...'
-            $SUDO sed 's/main/contrib/' < /etc/apt/sources.list | $SUDO tee /etc/apt/sources.list.d/contrib.list > /dev/null
-            if [ -f "/etc/apt/sources.list.d/debian.sources" ]; then
-                $SUDO sed 's/main/contrib/' < /etc/apt/sources.list.d/debian.sources | $SUDO tee /etc/apt/sources.list.d/contrib.sources > /dev/null
-            fi
-            ;;
-    esac
-
-    status 'Installing CUDA driver...'
-    $SUDO dpkg -i $TEMP_DIR/cuda-keyring.deb
-    $SUDO apt-get update
-
-    [ -n "$SUDO" ] && SUDO_E="$SUDO -E" || SUDO_E=
-    DEBIAN_FRONTEND=noninteractive $SUDO_E apt-get -y install cuda-drivers -q
-}
-
-if [ ! -f "/etc/os-release" ]; then
-    error "Unknown distribution. Skipping CUDA installation."
-fi
-
-. /etc/os-release
-
-OS_NAME=$ID
-OS_VERSION=$VERSION_ID
-
-PACKAGE_MANAGER=
-for PACKAGE_MANAGER in dnf yum apt-get; do
-    if available $PACKAGE_MANAGER; then
-        break
-    fi
-done
-
-if [ -z "$PACKAGE_MANAGER" ]; then
-    error "Unknown package manager. Skipping CUDA installation."
-fi
-
-if ! check_gpu nvidia-smi || [ -z "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")" ]; then
-    case $OS_NAME in
-        centos|rhel) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -d '.' -f 1) ;;
-        rocky) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -c1) ;;
-        fedora) [ $OS_VERSION -lt '39' ] && install_cuda_driver_yum $OS_NAME $OS_VERSION || install_cuda_driver_yum $OS_NAME '39';;
-        amzn) install_cuda_driver_yum 'fedora' '37' ;;
-        debian) install_cuda_driver_apt $OS_NAME $OS_VERSION ;;
-        ubuntu) install_cuda_driver_apt $OS_NAME $(echo $OS_VERSION | sed 's/\.//') ;;
-        *) exit ;;
-    esac
-fi
-
-if ! lsmod | grep -q nvidia || ! lsmod | grep -q nvidia_uvm; then
-    KERNEL_RELEASE="$(uname -r)"
-    case $OS_NAME in
-        rocky) $SUDO $PACKAGE_MANAGER -y install kernel-devel kernel-headers ;;
-        centos|rhel|amzn) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE kernel-headers-$KERNEL_RELEASE ;;
-        fedora) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE ;;
-        debian|ubuntu) $SUDO apt-get -y install linux-headers-$KERNEL_RELEASE ;;
-        *) exit ;;
-    esac
-
-    NVIDIA_CUDA_VERSION=$($SUDO dkms status | awk -F: '/added/ { print $1 }')
-    if [ -n "$NVIDIA_CUDA_VERSION" ]; then
-        $SUDO dkms install $NVIDIA_CUDA_VERSION
-    fi
-
-    if lsmod | grep -q nouveau; then
-        status 'Reboot to complete NVIDIA CUDA driver install.'
-        exit 0
-    fi
-
-    $SUDO modprobe nvidia
-    $SUDO modprobe nvidia_uvm
-fi
-
-# make sure the NVIDIA modules are loaded on boot with nvidia-persistenced
-if command -v nvidia-persistenced > /dev/null 2>&1; then
-    $SUDO touch /etc/modules-load.d/nvidia.conf
-    MODULES="nvidia nvidia-uvm"
-    for MODULE in $MODULES; do
-        if ! grep -qxF "$MODULE" /etc/modules-load.d/nvidia.conf; then
-            echo "$MODULE" | sudo tee -a /etc/modules-load.d/nvidia.conf > /dev/null
-        fi
-    done
-fi
-
-status "NVIDIA GPU ready."
-install_success
--- a/requirements.txt
+++ b/requirements.txt
@ -6,6 +6,7 @@ flake8
 httpx
 huggingface-hub
 json-strong-typing
+llama-models
 omegaconf
 pre-commit
 pydantic==1.10.13
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@ -13,6 +13,7 @@ from llama_models.llama3_1.api.datatypes import (
    UserMessage,
    StopReason,
    SystemMessage,
+    ToolResponseMessage,
 )
 from llama_toolchain.inference.api.datatypes import (
    ChatCompletionResponseEventType,
@ -256,3 +257,33 @@ class InferenceTests(unittest.IsolatedAsyncioTestCase):
        )
        self.assertEqual(events[-2].stop_reason, StopReason.end_of_turn)
        self.assertEqual(events[-2].delta.content.tool_name, "get_boiling_point")
+
+    async def test_multi_turn(self):
+        request = ChatCompletionRequest(
+            model=self.valid_supported_model,
+            messages=[
+                self.system_prompt,
+                UserMessage(
+                    content="Search the web and tell me who the "
+                    "44th president of the United States was",
+                ),
+                ToolResponseMessage(
+                    call_id="1",
+                    tool_name=BuiltinTool.brave_search,
+                    # content='{"query": "44th president of the United States", "top_k": [{"title": "Barack Obama | The White House", "url": "https://www.whitehouse.gov/about-the-white-house/presidents/barack-obama/", "description": "<strong>Barack Obama</strong> served as the 44th President of the United States. His story is the American story \\u2014 values from the heartland, a middle-class upbringing in a strong family, hard work and education as the means of getting ahead, and the conviction that a life so blessed should be lived in service ...", "type": "search_result"}, {"title": "Barack Obama \\u2013 The White House", "url": "https://trumpwhitehouse.archives.gov/about-the-white-house/presidents/barack-obama/", "description": "After working his way through college with the help of scholarships and student loans, <strong>President Obama</strong> moved to Chicago, where he worked with a group of churches to help rebuild communities devastated by the closure of local steel plants.", "type": "search_result"}, [{"type": "video_result", "url": "https://www.instagram.com/reel/CzMZbJmObn9/", "title": "Fifteen years ago, on Nov. 4, Barack Obama was elected as ...", "description": ""}, {"type": "video_result", "url": "https://video.alexanderstreet.com/watch/the-44th-president-barack-obama?context=channel:barack-obama", "title": "The 44th President (Barack Obama) - Alexander Street, a ...", "description": "You need to enable JavaScript to run this app"}, {"type": "video_result", "url": "https://www.youtube.com/watch?v=iyL7_2-em5k", "title": "Barack Obama for Kids | Learn about the life and contributions ...", "description": "Enjoy the videos and music you love, upload original content, and share it all with friends, family, and the world on YouTube."}, {"type": "video_result", "url": "https://www.britannica.com/video/172743/overview-Barack-Obama", "title": "President of the United States of America Barack Obama | Britannica", "description": "[NARRATOR] Barack Obama was elected the 44th president of the United States in 2008, becoming the first African American to hold the office. Obama vowed to bring change to the political system."}, {"type": "video_result", "url": "https://www.youtube.com/watch?v=rvr2g8-5dcE", "title": "The 44th President: In His Own Words - Toughest Day | Special ...", "description": "President Obama reflects on his toughest day in the Presidency and seeing Secret Service cry for the first time. Watch the premiere of The 44th President: In..."}]]}',
+                    content='"Barack Obama"',
+                ),
+            ],
+            stream=True,
+        )
+        iterator = self.api.chat_completion(request)
+
+        events = []
+        async for chunk in iterator:
+            events.append(chunk.event)
+
+        response = ""
+        for e in events[1:-1]:
+            response += e.delta
+
+        self.assertTrue("obama" in response.lower())
--- a/tests/test_ollama_inference.py
+++ b/tests/test_ollama_inference.py
@ -9,6 +9,7 @@ from llama_models.llama3_1.api.datatypes import (
    SamplingParams,
    SamplingStrategy,
    SystemMessage,
+    ToolResponseMessage,
 )
 from llama_toolchain.inference.api.datatypes import (
    ChatCompletionResponseEventType,
@ -21,14 +22,10 @@ from llama_toolchain.inference.ollama.ollama import get_provider_impl
 class OllamaInferenceTests(unittest.IsolatedAsyncioTestCase):

    async def asyncSetUp(self):
-        self.valid_supported_model = "Meta-Llama3.1-8B-Instruct"
-        ollama_config = OllamaImplConfig(
-            model="llama3.1:8b-instruct-fp16",
-            url="http://localhost:11434",
-        )
+        ollama_config = OllamaImplConfig(url="http://localhost:11434")

        # setup ollama
-        self.api = await get_provider_impl(ollama_config)
+        self.api = await get_provider_impl(ollama_config, {})
        await self.api.initialize()

        current_date = datetime.now()
@ -245,7 +242,6 @@ class OllamaInferenceTests(unittest.IsolatedAsyncioTestCase):
        iterator = self.api.chat_completion(request)
        events = []
        async for chunk in iterator:
-            # print(f"{chunk.event.event_type:<40} | {str(chunk.event.stop_reason):<26} | {chunk.event.delta} ")
            events.append(chunk.event)

        self.assertEqual(events[0].event_type, ChatCompletionResponseEventType.start)
@ -253,6 +249,12 @@ class OllamaInferenceTests(unittest.IsolatedAsyncioTestCase):
        self.assertEqual(
            events[-1].event_type, ChatCompletionResponseEventType.complete
        )
+        # last but one event should be eom with tool call
+        self.assertEqual(
+            events[-2].event_type, ChatCompletionResponseEventType.progress
+        )
+        self.assertEqual(events[-2].stop_reason, StopReason.end_of_message)
+        self.assertEqual(events[-2].delta.content.tool_name, BuiltinTool.brave_search)

    async def test_custom_tool_call_streaming(self):
        request = ChatCompletionRequest(
@ -317,3 +319,62 @@ class OllamaInferenceTests(unittest.IsolatedAsyncioTestCase):
                "top_p": 0.99,
            },
        )
+
+    async def test_multi_turn(self):
+        request = ChatCompletionRequest(
+            model=self.valid_supported_model,
+            messages=[
+                self.system_prompt,
+                UserMessage(
+                    content="Search the web and tell me who the "
+                    "44th president of the United States was",
+                ),
+                ToolResponseMessage(
+                    call_id="1",
+                    tool_name=BuiltinTool.brave_search,
+                    content='{"query": "44th president of the United States", "top_k": [{"title": "Barack Obama | The White House", "url": "https://www.whitehouse.gov/about-the-white-house/presidents/barack-obama/", "description": "<strong>Barack Obama</strong> served as the 44th President of the United States. His story is the American story \\u2014 values from the heartland, a middle-class upbringing in a strong family, hard work and education as the means of getting ahead, and the conviction that a life so blessed should be lived in service ...", "type": "search_result"}, {"title": "Barack Obama \\u2013 The White House", "url": "https://trumpwhitehouse.archives.gov/about-the-white-house/presidents/barack-obama/", "description": "After working his way through college with the help of scholarships and student loans, <strong>President Obama</strong> moved to Chicago, where he worked with a group of churches to help rebuild communities devastated by the closure of local steel plants.", "type": "search_result"}, [{"type": "video_result", "url": "https://www.instagram.com/reel/CzMZbJmObn9/", "title": "Fifteen years ago, on Nov. 4, Barack Obama was elected as ...", "description": ""}, {"type": "video_result", "url": "https://video.alexanderstreet.com/watch/the-44th-president-barack-obama?context=channel:barack-obama", "title": "The 44th President (Barack Obama) - Alexander Street, a ...", "description": "You need to enable JavaScript to run this app"}, {"type": "video_result", "url": "https://www.youtube.com/watch?v=iyL7_2-em5k", "title": "Barack Obama for Kids | Learn about the life and contributions ...", "description": "Enjoy the videos and music you love, upload original content, and share it all with friends, family, and the world on YouTube."}, {"type": "video_result", "url": "https://www.britannica.com/video/172743/overview-Barack-Obama", "title": "President of the United States of America Barack Obama | Britannica", "description": "[NARRATOR] Barack Obama was elected the 44th president of the United States in 2008, becoming the first African American to hold the office. Obama vowed to bring change to the political system."}, {"type": "video_result", "url": "https://www.youtube.com/watch?v=rvr2g8-5dcE", "title": "The 44th President: In His Own Words - Toughest Day | Special ...", "description": "President Obama reflects on his toughest day in the Presidency and seeing Secret Service cry for the first time. Watch the premiere of The 44th President: In..."}]]}',
+                ),
+            ],
+            stream=True,
+        )
+        iterator = self.api.chat_completion(request)
+
+        events = []
+        async for chunk in iterator:
+            events.append(chunk.event)
+
+        response = ""
+        for e in events[1:-1]:
+            response += e.delta
+
+        self.assertTrue("obama" in response.lower())
+
+    async def test_tool_call_code_streaming(self):
+        request = ChatCompletionRequest(
+            model=self.valid_supported_model,
+            messages=[
+                self.system_prompt,
+                UserMessage(
+                    content="Write code to answer this question: What is the 100th prime number?",
+                ),
+            ],
+            stream=True,
+        )
+        iterator = self.api.chat_completion(request)
+        events = []
+        async for chunk in iterator:
+            events.append(chunk.event)
+
+        self.assertEqual(events[0].event_type, ChatCompletionResponseEventType.start)
+        # last event is of type "complete"
+        self.assertEqual(
+            events[-1].event_type, ChatCompletionResponseEventType.complete
+        )
+        # last but one event should be eom with tool call
+        self.assertEqual(
+            events[-2].event_type, ChatCompletionResponseEventType.progress
+        )
+        self.assertEqual(events[-2].stop_reason, StopReason.end_of_message)
+        self.assertEqual(
+            events[-2].delta.content.tool_name, BuiltinTool.code_interpreter
+        )