llama-stack-mirror/llama_stack/providers/remote/inference/ollama/docker_compose.yaml

services:
  ${SERVICE_NAME:-ollama}:
    image: ollama/ollama:latest
    ports:
      - ${OLLAMA_PORT:-11434}:${OLLAMA_PORT:-11434}
    volumes:
      - $HOME/.ollama:/root/.ollama
    devices:
      - nvidia.com/gpu=all
    runtime: nvidia
    healthcheck:
      test: ["CMD", "curl", "-f", "http://ollama:11434"]
      interval: 10s
      timeout: 5s
      retries: 5

  ${SERVICE_NAME:-ollama}-init:
    image: ollama/ollama
    depends_on:
      - ${SERVICE_NAME:-ollama}:
          condition: service_healthy
    environment:
      - OLLAMA_HOST=ollama
      - OLLAMA_MODELS=${OLLAMA_MODELS}
    volumes:
      - $HOME/.ollama:/root/.ollama
    entrypoint: >
      sh -c '
        max_attempts=30;
        attempt=0;

        echo "Waiting for Ollama server...";
        until curl -s http://ollama:11434 > /dev/null; do
          attempt=$((attempt + 1));
          if [ $attempt -ge $max_attempts ]; then
            echo "Timeout waiting for Ollama server";
            exit 1;
          fi;
          echo "Attempt $attempt: Server not ready yet...";
          sleep 5;
        done;

        echo "Server ready. Pulling models...";

        models="${OLLAMA_MODELS}";
        for model in $models; do
          echo "Pulling $model...";
          if ! ollama pull "$model"; then
            echo "Failed to pull $model";
            exit 1;
          fi;
        done;

        echo "All models pulled successfully"
      '