services: ollama: image: ollama/ollama:latest network_mode: ${NETWORK_MODE:-bridge} volumes: - ~/.ollama:/root/.ollama ports: - "11434:11434" environment: OLLAMA_DEBUG: 1 command: [] deploy: resources: limits: memory: 8G # Set maximum memory reservations: memory: 8G # Set minimum memory reservation # healthcheck: # # ugh, no CURL in ollama image # test: ["CMD", "curl", "-f", "http://ollama:11434"] # interval: 10s # timeout: 5s # retries: 5 ollama-init: image: ollama/ollama:latest depends_on: - ollama # condition: service_healthy network_mode: ${NETWORK_MODE:-bridge} environment: - OLLAMA_HOST=ollama - INFERENCE_MODEL=${INFERENCE_MODEL} - SAFETY_MODEL=${SAFETY_MODEL:-} volumes: - ~/.ollama:/root/.ollama - ./pull-models.sh:/pull-models.sh entrypoint: ["/pull-models.sh"] llamastack: depends_on: ollama: condition: service_started ollama-init: condition: service_started image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama} network_mode: ${NETWORK_MODE:-bridge} volumes: - ~/.llama:/root/.llama # Link to ollama run.yaml file - ~/local/llama-stack/:/app/llama-stack-source - ./run${SAFETY_MODEL:+-with-safety}.yaml:/root/my-run.yaml ports: - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}" environment: - INFERENCE_MODEL=${INFERENCE_MODEL} - SAFETY_MODEL=${SAFETY_MODEL:-} - OLLAMA_URL=http://ollama:11434 entrypoint: > python -m llama_stack.distribution.server.server /root/my-run.yaml \ --port ${LLAMA_STACK_PORT:-5001} deploy: restart_policy: condition: on-failure delay: 10s max_attempts: 3 window: 60s volumes: ollama: ollama-init: llamastack: