diff --git a/llama_stack/distribution/docker/ollama/compose.yaml b/llama_stack/distribution/docker/ollama/compose.yaml new file mode 100644 index 000000000..51ae103a3 --- /dev/null +++ b/llama_stack/distribution/docker/ollama/compose.yaml @@ -0,0 +1,46 @@ +services: + ollama: + image: ollama/ollama:latest + network_mode: "host" + volumes: + - ollama:/root/.ollama # this solution synchronizes with the docker volume and loads the model rocket fast + ports: + - "11434:11434" + devices: + - nvidia.com/gpu=all + environment: + - CUDA_VISIBLE_DEVICES=0 + command: [] + deploy: + resources: + reservations: + devices: + - driver: nvidia + # that's the closest analogue to --gpus; provide + # an integer amount of devices or 'all' + count: 1 + # Devices are reserved using a list of capabilities, making + # capabilities the only required field. A device MUST + # satisfy all the requested capabilities for a successful + # reservation. + capabilities: [gpu] + runtime: nvidia + llamastack-local-cpu: + depends_on: ollama + image: llamastack/llamastack-local-cpu + network_mode: "host" + volumes: + - ~/.llama:/root/.llama + # Link to TGI run.yaml file + - ./ollama-run.yaml:/root/llamastack-run-ollama.yaml + ports: + - "5000:5000" + # Hack: wait for TGI server to start before starting docker + entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-ollama.yaml" + restart_policy: + condition: on-failure + delay: 3s + max_attempts: 5 + window: 60s +volumes: + ollama: diff --git a/llama_stack/distribution/docker/ollama/ollama-run.yaml b/llama_stack/distribution/docker/ollama/ollama-run.yaml new file mode 100644 index 000000000..798dabc0b --- /dev/null +++ b/llama_stack/distribution/docker/ollama/ollama-run.yaml @@ -0,0 +1,46 @@ +version: '2' +built_at: '2024-10-08T17:40:45.325529' +image_name: local +docker_image: null +conda_env: local +apis: +- shields +- agents +- models +- memory +- memory_banks +- inference +- safety +providers: + inference: + - provider_id: ollama0 + provider_type: remote::ollama + config: + url: http://127.0.0.1:14343 + safety: + - provider_id: meta0 + provider_type: meta-reference + config: + llama_guard_shield: + model: Llama-Guard-3-1B + excluded_categories: [] + disable_input_check: false + disable_output_check: false + prompt_guard_shield: + model: Prompt-Guard-86M + memory: + - provider_id: meta0 + provider_type: meta-reference + config: {} + agents: + - provider_id: meta0 + provider_type: meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: ~/.llama/runtime/kvstore.db + telemetry: + - provider_id: meta0 + provider_type: meta-reference + config: {} diff --git a/llama_stack/distribution/docker/tgi/compose.yaml b/llama_stack/distribution/docker/tgi/compose.yaml index d5bcd50f3..b525afc33 100644 --- a/llama_stack/distribution/docker/tgi/compose.yaml +++ b/llama_stack/distribution/docker/tgi/compose.yaml @@ -38,7 +38,7 @@ services: depends_on: text-generation-inference: condition: service_healthy - image: llamastack-local-cpu + image: llamastack/llamastack-local-cpu network_mode: "host" volumes: - ~/.llama:/root/.llama diff --git a/llama_stack/distribution/templates/run_configs/local-tgi-chroma-run.yaml b/llama_stack/distribution/templates/run_configs/local-tgi-chroma-run.yaml new file mode 100644 index 000000000..e86ea2722 --- /dev/null +++ b/llama_stack/distribution/templates/run_configs/local-tgi-chroma-run.yaml @@ -0,0 +1,48 @@ +version: '2' +built_at: '2024-10-08T17:40:45.325529' +image_name: local +docker_image: null +conda_env: local +apis: +- shields +- agents +- models +- memory +- memory_banks +- inference +- safety +providers: + inference: + - provider_id: tgi0 + provider_type: remote::tgi + config: + url: http://127.0.0.1:5009 + safety: + - provider_id: meta-reference + provider_type: meta-reference + config: + llama_guard_shield: + model: Llama-Guard-3-1B + excluded_categories: [] + disable_input_check: false + disable_output_check: false + prompt_guard_shield: + model: Prompt-Guard-86M + memory: + - provider_id: chroma0 + provider_type: remote::chromadb + config: + host: localhost + port: 6000 + agents: + - provider_id: meta-reference + provider_type: meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: ~/.llama/runtime/kvstore.db + telemetry: + - provider_id: meta-reference + provider_type: meta-reference + config: {}