diff --git a/llama_stack/distribution/templates/docker/llamastack-local-cpu/Dockerfile b/llama_stack/distribution/templates/docker/llamastack-local-cpu/Dockerfile new file mode 100644 index 000000000..e69de29bb diff --git a/llama_stack/distribution/templates/docker/llamastack-local-cpu/build.yaml b/llama_stack/distribution/templates/docker/llamastack-local-cpu/build.yaml new file mode 100644 index 000000000..9db019454 --- /dev/null +++ b/llama_stack/distribution/templates/docker/llamastack-local-cpu/build.yaml @@ -0,0 +1,15 @@ +name: local-cpu +distribution_spec: + description: remote inference + local safety/agents/memory + docker_image: null + providers: + inference: + - remote::ollama + - remote::tgi + - remote::together + - remote::fireworks + safety: meta-reference + agents: meta-reference + memory: meta-reference + telemetry: meta-reference +image_type: docker diff --git a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml new file mode 100644 index 000000000..6a4b2e464 --- /dev/null +++ b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml @@ -0,0 +1,64 @@ +built_at: '2024-09-30T09:04:30.533391' +image_name: local-cpu +docker_image: local-cpu +conda_env: null +apis_to_serve: +- agents +- inference +- models +- memory +- safety +- shields +- memory_banks +api_providers: + inference: + providers: + - remote::ollama + safety: + providers: + - meta-reference + agents: + provider_id: meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: /home/xiyan/.llama/runtime/kvstore.db + memory: + providers: + - meta-reference + telemetry: + provider_id: meta-reference + config: {} +routing_table: + inference: + - provider_id: remote::ollama + config: + host: localhost + port: 6000 + routing_key: Meta-Llama3.1-8B-Instruct + safety: + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: llama_guard + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: code_scanner_guard + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: injection_shield + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: jailbreak_shield + memory: + - provider_id: meta-reference + config: {} + routing_key: vector diff --git a/llama_stack/distribution/templates/docker/llamastack-local-gpu/Dockerfile b/llama_stack/distribution/templates/docker/llamastack-local-gpu/Dockerfile new file mode 100644 index 000000000..00d2fa140 --- /dev/null +++ b/llama_stack/distribution/templates/docker/llamastack-local-gpu/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.10-slim +WORKDIR /app + +RUN apt-get update && apt-get install -y iputils-ping net-tools iproute2 dnsutils telnet curl wget telnet procps psmisc lsof traceroute bubblewrap && rm -rf /var/lib/apt/lists/* + +RUN pip install /app/llama-stack-source +RUN pip uninstall -y llama-models +RUN pip install /app/llama-models-source + +RUN pip install fastapi fire httpx uvicorn accelerate blobfile fairscale fbgemm-gpu==0.8.0 torch torchvision transformers zmq codeshield transformers matplotlib pillow pandas scikit-learn aiosqlite psycopg2-binary redis blobfile chardet pypdf tqdm numpy scikit-learn scipy nltk sentencepiece transformers faiss-cpu +RUN pip install torch --index-url https://download.pytorch.org/whl/cpu +RUN pip install torch --index-url https://download.pytorch.org/whl/cpu +RUN pip install sentence-transformers --no-deps + +# This would be good in production but for debugging flexibility lets not add it right now +# We need a more solid production ready entrypoint.sh anyway +# +CMD ["python", "-m", "llama_stack.distribution.server.server", "./llamastack-run.yaml"] + +ADD tmp/configs/local-gpu-build.yaml ./llamastack-build.yaml +ADD tmp/configs/local-gpu-run.yaml ./llamastack-run.yaml diff --git a/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml b/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml new file mode 100644 index 000000000..2969479dc --- /dev/null +++ b/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml @@ -0,0 +1,67 @@ +built_at: '2024-09-30T09:00:56.693751' +image_name: local-gpu +docker_image: local-gpu +conda_env: null +apis_to_serve: +- memory +- inference +- agents +- shields +- safety +- models +- memory_banks +api_providers: + inference: + providers: + - meta-reference + safety: + providers: + - meta-reference + agents: + provider_id: meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: /home/xiyan/.llama/runtime/kvstore.db + memory: + providers: + - meta-reference + telemetry: + provider_id: meta-reference + config: {} +routing_table: + inference: + - provider_id: meta-reference + config: + model: Llama3.1-8B-Instruct + quantization: null + torch_seed: null + max_seq_len: 4096 + max_batch_size: 1 + routing_key: Llama3.1-8B-Instruct + safety: + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: llama_guard + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: code_scanner_guard + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: injection_shield + - provider_id: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + routing_key: jailbreak_shield + memory: + - provider_id: meta-reference + config: {} + routing_key: vector