remove inline-nvidia templates

2025-12-03 09:53:45 +00:00 · 2025-01-15 14:15:56 -08:00 · 2025-01-15 14:15:56 -08:00 · 27e07b44b5
commit 27e07b44b5
parent b3202bcf77
3 changed files with 0 additions and 159 deletions
--- a/distributions/inline-nvidia/build.yaml
+++ b/distributions/inline-nvidia/build.yaml
@ -1 +0,0 @@
-../../llama_stack/templates/nvidia/build.yaml
--- a/distributions/inline-nvidia/compose.yaml
+++ b/distributions/inline-nvidia/compose.yaml
@ -1,58 +0,0 @@
-services:
-  nim:
-    image: ${DOCKER_IMAGE:-nvcr.io/nim/meta/llama-3.1-8b-instruct:latest}
-    network_mode: "host"
-    volumes:
-    - nim-llm-cache:/opt/nim/.cache
-    ports:
-      - "8000:8000"
-    shm_size: 16G
-    environment:
-      - CUDA_VISIBLE_DEVICES=0
-      - NIM_HTTP_API_PORT=8000
-      - NIM_TRITON_LOG_VERBOSE=1
-      - NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}}
-    command: []
-    deploy:
-      resources:
-        reservations:
-          devices:
-          - driver: nvidia
-            # that's the closest analogue to --gpus; provide
-            # an integer amount of devices or 'all'
-            count: 1
-            # Devices are reserved using a list of capabilities, making
-            # capabilities the only required field. A device MUST
-            # satisfy all the requested capabilities for a successful
-            # reservation.
-            capabilities: [gpu]
-    runtime: nvidia
-    healthcheck:
-      test: ["CMD", "curl", "http://localhost:8000/v1/health/ready"]
-      interval: 5s
-      timeout: 5s
-      retries: 30
-      start_period: 120s
-  llamastack:
-    depends_on:
-    - nim
-    image: distribution-nvidia:dev
-    network_mode: "host"
-    volumes:
-      - ~/.llama:/root/.llama
-      - ./run.yaml:/root/llamastack-run-nvidia.yaml
-    ports:
-      - "5000:5000"
-    environment:
-      - INFERENCE_MODEL=${INFERENCE_MODEL:-Llama3.1-8B-Instruct}
-      - NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
-    entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml-config /root/llamastack-run-nvidia.yaml"
-    deploy:
-      restart_policy:
-        condition: on-failure
-        delay: 3s
-        max_attempts: 5
-        window: 60s
-volumes:
-  nim-llm-cache:
-    driver: local
--- a/distributions/inline-nvidia/run.yaml
+++ b/distributions/inline-nvidia/run.yaml
@ -1,100 +0,0 @@
-version: '2'
-image_name: nvidia
-conda_env: nvidia
-apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
- tool_runtime
-providers:
-  inference:
-  - provider_id: nvidia
-    provider_type: remote::nvidia
-    config:
-      url: http://localhost:8000
-      api_key: ${env.NVIDIA_API_KEY} # TODO: don't need api key, code adjustments needed
-  memory:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config: {}
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/nvidia/trace_store.db}
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config: {}
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config: {}
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config: {}
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
-      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
-  - provider_id: memory-runtime
-    provider_type: inline::memory-runtime
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
-models:
- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: nvidia
-  model_type: llm
-shields: []
-memory_banks: []
-datasets: []
-scoring_fns: []
-eval_tasks: []
-tool_groups: []
				`@ -1 +0,0 @@`
				`../../llama_stack/templates/nvidia/build.yaml`