Move run-*.yaml to templates/ so they can be packaged

2025-12-17 16:29:54 +00:00 · 2024-11-18 14:54:20 -08:00 · 2024-11-18 14:54:20 -08:00 · 5dce17668c
commit 5dce17668c
parent dd732f037f
23 changed files with 682 additions and 722 deletions
--- a/llama_stack/providers/remote/inference/ollama/docker_compose.yaml
+++ b/llama_stack/providers/remote/inference/ollama/docker_compose.yaml
@ -1,55 +0,0 @@
-services:
-  ${SERVICE_NAME:-ollama}:
-    image: ollama/ollama:latest
-    ports:
-      - ${OLLAMA_PORT:-11434}:${OLLAMA_PORT:-11434}
-    volumes:
-      - $HOME/.ollama:/root/.ollama
-    devices:
-      - nvidia.com/gpu=all
-    runtime: nvidia
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://ollama:11434"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-
-  ${SERVICE_NAME:-ollama}-init:
-    image: ollama/ollama
-    depends_on:
-      - ${SERVICE_NAME:-ollama}:
-          condition: service_healthy
-    environment:
-      - OLLAMA_HOST=ollama
-      - OLLAMA_MODELS=${OLLAMA_MODELS}
-    volumes:
-      - $HOME/.ollama:/root/.ollama
-    entrypoint: >
-      sh -c '
-        max_attempts=30;
-        attempt=0;
-
-        echo "Waiting for Ollama server...";
-        until curl -s http://ollama:11434 > /dev/null; do
-          attempt=$((attempt + 1));
-          if [ $attempt -ge $max_attempts ]; then
-            echo "Timeout waiting for Ollama server";
-            exit 1;
-          fi;
-          echo "Attempt $attempt: Server not ready yet...";
-          sleep 5;
-        done;
-
-        echo "Server ready. Pulling models...";
-
-        models="${OLLAMA_MODELS}";
-        for model in $models; do
-          echo "Pulling $model...";
-          if ! ollama pull "$model"; then
-            echo "Failed to pull $model";
-            exit 1;
-          fi;
-        done;
-
-        echo "All models pulled successfully"
-      '