Add ollama/pull-models.sh

2025-12-17 07:42:36 +00:00 · 2024-11-18 10:57:20 -08:00 · 2024-11-18 10:57:20 -08:00 · 1ecaf2cb3c
commit 1ecaf2cb3c
parent fa1d29cfdc
16 changed files with 305 additions and 289 deletions
--- a/distributions/ollama/compose.yaml
+++ b/distributions/ollama/compose.yaml
@ -1,30 +1,71 @@
 services:
  ollama:
    image: ollama/ollama:latest
-    network_mode: "host"
+    network_mode: ${NETWORK_MODE:-bridge}
    volumes:
-      - ollama:/root/.ollama # this solution synchronizes with the docker volume and loads the model rocket fast
+      - ~/.ollama:/root/.ollama
    ports:
      - "11434:11434"
+    environment:
+      OLLAMA_DEBUG: 1
    command: []
+    deploy:
+      resources:
+        limits:
+          memory: 8G    # Set maximum memory
+        reservations:
+          memory: 8G    # Set minimum memory reservation
+    # healthcheck:
+    #   # ugh, no CURL in ollama image
+    #   test: ["CMD", "curl", "-f", "http://ollama:11434"]
+    #   interval: 10s
+    #   timeout: 5s
+    #   retries: 5
+
+  ollama-init:
+    image: ollama/ollama:latest
+    depends_on:
+      - ollama
+        # condition: service_healthy
+    network_mode: ${NETWORK_MODE:-bridge}
+    environment:
+      - OLLAMA_HOST=ollama
+      - INFERENCE_MODEL=${INFERENCE_MODEL}
+      - SAFETY_MODEL=${SAFETY_MODEL:-}
+    volumes:
+      - ~/.ollama:/root/.ollama
+      - ./pull-models.sh:/pull-models.sh
+    entrypoint: ["/pull-models.sh"]
+
  llamastack:
    depends_on:
-    - ollama
-    image: llamastack/distribution-ollama
-    network_mode: "host"
+      ollama:
+        condition: service_started
+      ollama-init:
+        condition: service_started
+    image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama}
+    network_mode: ${NETWORK_MODE:-bridge}
    volumes:
      - ~/.llama:/root/.llama
      # Link to ollama run.yaml file
-      - ./run.yaml:/root/my-run.yaml
+      - ~/local/llama-stack/:/app/llama-stack-source
+      - ./run${SAFETY_MODEL:+-with-safety}.yaml:/root/my-run.yaml
    ports:
-      - "5000:5000"
-    # Hack: wait for ollama server to start before starting docker
-    entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml"
+      - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}"
+    environment:
+      - INFERENCE_MODEL=${INFERENCE_MODEL}
+      - SAFETY_MODEL=${SAFETY_MODEL:-}
+      - OLLAMA_URL=http://ollama:11434
+    entrypoint: >
+        python -m llama_stack.distribution.server.server /root/my-run.yaml \
+        --port ${LLAMA_STACK_PORT:-5001}
    deploy:
      restart_policy:
        condition: on-failure
-        delay: 3s
-        max_attempts: 5
+        delay: 10s
+        max_attempts: 3
        window: 60s
 volumes:
  ollama:
+  ollama-init:
+  llamastack: