remove inline-nvidia templates

This commit is contained in:
Xi Yan 2025-01-15 14:15:56 -08:00
parent b3202bcf77
commit 27e07b44b5
3 changed files with 0 additions and 159 deletions

View file

@ -1 +0,0 @@
../../llama_stack/templates/nvidia/build.yaml

View file

@ -1,58 +0,0 @@
services:
nim:
image: ${DOCKER_IMAGE:-nvcr.io/nim/meta/llama-3.1-8b-instruct:latest}
network_mode: "host"
volumes:
- nim-llm-cache:/opt/nim/.cache
ports:
- "8000:8000"
shm_size: 16G
environment:
- CUDA_VISIBLE_DEVICES=0
- NIM_HTTP_API_PORT=8000
- NIM_TRITON_LOG_VERBOSE=1
- NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}}
command: []
deploy:
resources:
reservations:
devices:
- driver: nvidia
# that's the closest analogue to --gpus; provide
# an integer amount of devices or 'all'
count: 1
# Devices are reserved using a list of capabilities, making
# capabilities the only required field. A device MUST
# satisfy all the requested capabilities for a successful
# reservation.
capabilities: [gpu]
runtime: nvidia
healthcheck:
test: ["CMD", "curl", "http://localhost:8000/v1/health/ready"]
interval: 5s
timeout: 5s
retries: 30
start_period: 120s
llamastack:
depends_on:
- nim
image: distribution-nvidia:dev
network_mode: "host"
volumes:
- ~/.llama:/root/.llama
- ./run.yaml:/root/llamastack-run-nvidia.yaml
ports:
- "5000:5000"
environment:
- INFERENCE_MODEL=${INFERENCE_MODEL:-Llama3.1-8B-Instruct}
- NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml-config /root/llamastack-run-nvidia.yaml"
deploy:
restart_policy:
condition: on-failure
delay: 3s
max_attempts: 5
window: 60s
volumes:
nim-llm-cache:
driver: local

View file

@ -1,100 +0,0 @@
version: '2'
image_name: nvidia
conda_env: nvidia
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: nvidia
provider_type: remote::nvidia
config:
url: http://localhost:8000
api_key: ${env.NVIDIA_API_KEY} # TODO: don't need api key, code adjustments needed
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/nvidia/trace_store.db}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: nvidia
model_type: llm
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups: []