llama-stack/distributions/ollama/gpu/compose.yaml
Xi Yan 4d2bd2d39e
add more distro templates (#279)
* verify dockers

* together distro verified

* readme

* fireworks distro

* fireworks compose up

* fireworks verified
2024-10-21 18:15:08 -07:00

48 lines
1.5 KiB
YAML

services:
ollama:
image: ollama/ollama:latest
network_mode: "host"
volumes:
- ollama:/root/.ollama # this solution synchronizes with the docker volume and loads the model rocket fast
ports:
- "11434:11434"
devices:
- nvidia.com/gpu=all
environment:
- CUDA_VISIBLE_DEVICES=0
command: []
deploy:
resources:
reservations:
devices:
- driver: nvidia
# that's the closest analogue to --gpus; provide
# an integer amount of devices or 'all'
count: 1
# Devices are reserved using a list of capabilities, making
# capabilities the only required field. A device MUST
# satisfy all the requested capabilities for a successful
# reservation.
capabilities: [gpu]
runtime: nvidia
llamastack-local-cpu:
depends_on:
- ollama
image: llamastack/llamastack-local-cpu
network_mode: "host"
volumes:
- ~/.llama:/root/.llama
# Link to ollama run.yaml file
- ./run.yaml:/root/llamastack-run-ollama.yaml
ports:
- "5000:5000"
# Hack: wait for ollama server to start before starting docker
entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-ollama.yaml"
deploy:
restart_policy:
condition: on-failure
delay: 3s
max_attempts: 5
window: 60s
volumes:
ollama: