docker compose commands

2025-10-15 14:43:48 +00:00 · 2024-11-08 14:00:54 -08:00 · 2024-11-08 14:00:54 -08:00 · 8cd7e406c0
commit 8cd7e406c0
parent bd0622ef10
9 changed files with 129 additions and 0 deletions
--- a/docker/chromadb/compose.yaml
+++ b/docker/chromadb/compose.yaml
@ -0,0 +1,11 @@
+services:
+  chromadb:
+    image: chromadb/chroma:latest
+    container_name: chromadb
+    ports:
+      - "6000:6000"
+    volumes:
+      - ./chroma_vdb:/chroma/chroma
+    environment:
+      - IS_PERSISTENT=TRUE
+    network_mode: "host"
--- a/docker/llamastack/compose.yaml
+++ b/docker/llamastack/compose.yaml
@ -0,0 +1,20 @@
+services:
+  llamastack:
+    depends_on:
+    - ollama
+    image: llamastack/distribution-ollama
+    network_mode: "host"
+    volumes:
+      - ~/.llama:/root/.llama
+      # Link to ollama run.yaml file
+      - ./run.yaml:/root/my-run.yaml
+    ports:
+      - "5000:5000"
+    # Hack: wait for ollama server to start before starting docker
+    entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml"
+    deploy:
+      restart_policy:
+        condition: on-failure
+        delay: 3s
+        max_attempts: 5
+        window: 60s
--- a/docker/ollama/compose.yaml
+++ b/docker/ollama/compose.yaml
@ -0,0 +1,11 @@
+services:
+  ollama:
+    image: ollama/ollama:latest
+    network_mode: "host"
+    volumes:
+      - ollama:/root/.ollama # this solution synchronizes with the docker volume and loads the model rocket fast
+    ports:
+      - "11434:11434"
+    command: []
+volumes:
+  ollama:
--- a/docker/pgvector/compose.yaml
+++ b/docker/pgvector/compose.yaml
@ -0,0 +1,18 @@
+services:
+  postgres:
+    build:
+      context: ./postgres
+      dockerfile: postgres.Dockerfile
+    ports:
+      - "5432:5432"
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+      - ./postgres/vector_extension.sql:/docker-entrypoint-initdb.d/0-vector_extension.sql
+      # - ./postgres/0-vector-extension.sh:/docker-entrypoint-initdb.d/0-vector-extension.sh
+
+    environment:
+      - POSTGRES_USER=postgres
+      - POSTGRES_PASSWORD=postgres
+      - POSTGRES_DB=vectorexample
+volumes:
+  postgres_data:
--- a/docker/pgvector/postgres/0-vector-extension.sh
+++ b/docker/pgvector/postgres/0-vector-extension.sh
@ -0,0 +1,14 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+set -e
+
+echo "In create extension"
+
+psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname="$POSTGRES_DB" <<EOFSQL
+CREATE EXTENSION vector;
+EOFSQL
--- a/docker/pgvector/postgres/init.sql
+++ b/docker/pgvector/postgres/init.sql
@ -0,0 +1,2 @@
+-- Create the 'store' database
+CREATE DATABASE store;
--- a/docker/pgvector/postgres/postgres.Dockerfile
+++ b/docker/pgvector/postgres/postgres.Dockerfile
@ -0,0 +1,15 @@
+# This is installing the pgvector extension for postgres
+FROM postgres:latest
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    git \
+    postgresql-server-dev-all \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /tmp
+RUN git clone https://github.com/pgvector/pgvector.git
+
+WORKDIR /tmp/pgvector
+RUN make
+RUN make install
--- a/docker/pgvector/postgres/vector_extension.sql
+++ b/docker/pgvector/postgres/vector_extension.sql
@ -0,0 +1,2 @@
+-- Create the 'vector' extension within the database that is set in the docker-compose.yml
+CREATE EXTENSION IF NOT EXISTS vector;
--- a/docker/tgi/compose.yaml
+++ b/docker/tgi/compose.yaml
@ -0,0 +1,36 @@
+services:
+  text-generation-inference:
+    image: ghcr.io/huggingface/text-generation-inference:latest
+    network_mode: "host"
+    volumes:
+      - $HOME/.cache/huggingface:/data
+    ports:
+      - "5009:5009"
+    devices:
+      - nvidia.com/gpu=all
+    environment:
+      - CUDA_VISIBLE_DEVICES=0
+      - HF_HOME=/data
+      - HF_DATASETS_CACHE=/data
+      - HF_MODULES_CACHE=/data
+      - HF_HUB_CACHE=/data
+    command: ["--dtype", "bfloat16", "--usage-stats", "on", "--sharded", "false", "--model-id", "meta-llama/Llama-3.1-8B-Instruct", "--port", "5009", "--cuda-memory-fraction", "0.3"]
+    deploy:
+      resources:
+        reservations:
+          devices:
+          - driver: nvidia
+            # that's the closest analogue to --gpus; provide
+            # an integer amount of devices or 'all'
+            count: 1
+            # Devices are reserved using a list of capabilities, making
+            # capabilities the only required field. A device MUST
+            # satisfy all the requested capabilities for a successful
+            # reservation.
+            capabilities: [gpu]
+    runtime: nvidia
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://text-generation-inference:5009/health"]
+      interval: 5s
+      timeout: 5s
+      retries: 30