forked from phoenix-oss/llama-stack-mirror
# What does this PR do? Automatically generates - build.yaml - run.yaml - run-with-safety.yaml - parts of markdown docs for the distributions. ## Test Plan At this point, this only updates the YAMLs and the docs. Some testing (especially with ollama and vllm) has been performed but needs to be much more tested.
71 lines
1.9 KiB
YAML
71 lines
1.9 KiB
YAML
services:
|
|
ollama:
|
|
image: ollama/ollama:latest
|
|
network_mode: ${NETWORK_MODE:-bridge}
|
|
volumes:
|
|
- ~/.ollama:/root/.ollama
|
|
ports:
|
|
- "11434:11434"
|
|
environment:
|
|
OLLAMA_DEBUG: 1
|
|
command: []
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 8G # Set maximum memory
|
|
reservations:
|
|
memory: 8G # Set minimum memory reservation
|
|
# healthcheck:
|
|
# # ugh, no CURL in ollama image
|
|
# test: ["CMD", "curl", "-f", "http://ollama:11434"]
|
|
# interval: 10s
|
|
# timeout: 5s
|
|
# retries: 5
|
|
|
|
ollama-init:
|
|
image: ollama/ollama:latest
|
|
depends_on:
|
|
- ollama
|
|
# condition: service_healthy
|
|
network_mode: ${NETWORK_MODE:-bridge}
|
|
environment:
|
|
- OLLAMA_HOST=ollama
|
|
- INFERENCE_MODEL=${INFERENCE_MODEL}
|
|
- SAFETY_MODEL=${SAFETY_MODEL:-}
|
|
volumes:
|
|
- ~/.ollama:/root/.ollama
|
|
- ./pull-models.sh:/pull-models.sh
|
|
entrypoint: ["/pull-models.sh"]
|
|
|
|
llamastack:
|
|
depends_on:
|
|
ollama:
|
|
condition: service_started
|
|
ollama-init:
|
|
condition: service_started
|
|
image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama}
|
|
network_mode: ${NETWORK_MODE:-bridge}
|
|
volumes:
|
|
- ~/.llama:/root/.llama
|
|
# Link to ollama run.yaml file
|
|
- ~/local/llama-stack/:/app/llama-stack-source
|
|
- ./run${SAFETY_MODEL:+-with-safety}.yaml:/root/my-run.yaml
|
|
ports:
|
|
- "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}"
|
|
environment:
|
|
- INFERENCE_MODEL=${INFERENCE_MODEL}
|
|
- SAFETY_MODEL=${SAFETY_MODEL:-}
|
|
- OLLAMA_URL=http://ollama:11434
|
|
entrypoint: >
|
|
python -m llama_stack.distribution.server.server /root/my-run.yaml \
|
|
--port ${LLAMA_STACK_PORT:-5001}
|
|
deploy:
|
|
restart_policy:
|
|
condition: on-failure
|
|
delay: 10s
|
|
max_attempts: 3
|
|
window: 60s
|
|
volumes:
|
|
ollama:
|
|
ollama-init:
|
|
llamastack:
|