remove inline-nvidia templates

2025-01-15 14:15:56 -08:00 · 2025-01-15 14:15:56 -08:00 · 27e07b44b5
commit 27e07b44b5
parent b3202bcf77
3 changed files with 0 additions and 159 deletions
--- a/distributions/inline-nvidia/build.yaml
+++ b/distributions/inline-nvidia/build.yaml
@ -1 +0,0 @@
 ../../llama_stack/templates/nvidia/build.yaml
--- a/distributions/inline-nvidia/compose.yaml
+++ b/distributions/inline-nvidia/compose.yaml
@ -1,58 +0,0 @@
 services:
  nim:
    image: ${DOCKER_IMAGE:-nvcr.io/nim/meta/llama-3.1-8b-instruct:latest}
    network_mode: "host"
    volumes:
    - nim-llm-cache:/opt/nim/.cache
    ports:
      - "8000:8000"
    shm_size: 16G
    environment:
      - CUDA_VISIBLE_DEVICES=0
      - NIM_HTTP_API_PORT=8000
      - NIM_TRITON_LOG_VERBOSE=1
      - NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}}
    command: []
    deploy:
      resources:
        reservations:
          devices:
          - driver: nvidia
            # that's the closest analogue to --gpus; provide
            # an integer amount of devices or 'all'
            count: 1
            # Devices are reserved using a list of capabilities, making
            # capabilities the only required field. A device MUST
            # satisfy all the requested capabilities for a successful
            # reservation.
            capabilities: [gpu]
    runtime: nvidia
    healthcheck:
      test: ["CMD", "curl", "http://localhost:8000/v1/health/ready"]
      interval: 5s
      timeout: 5s
      retries: 30
      start_period: 120s
  llamastack:
    depends_on:
    - nim
    image: distribution-nvidia:dev
    network_mode: "host"
    volumes:
      - ~/.llama:/root/.llama
      - ./run.yaml:/root/llamastack-run-nvidia.yaml
    ports:
      - "5000:5000"
    environment:
      - INFERENCE_MODEL=${INFERENCE_MODEL:-Llama3.1-8B-Instruct}
      - NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
    entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml-config /root/llamastack-run-nvidia.yaml"
    deploy:
      restart_policy:
        condition: on-failure
        delay: 3s
        max_attempts: 5
        window: 60s
 volumes:
  nim-llm-cache:
    driver: local
--- a/distributions/inline-nvidia/run.yaml
+++ b/distributions/inline-nvidia/run.yaml
@ -1,100 +0,0 @@
 version: '2'
 image_name: nvidia
 conda_env: nvidia
 apis:
 - agents
 - datasetio
 - eval
 - inference
 - memory
 - safety
 - scoring
 - telemetry
 - tool_runtime
 providers:
  inference:
  - provider_id: nvidia
    provider_type: remote::nvidia
    config:
      url: http://localhost:8000
      api_key: ${env.NVIDIA_API_KEY} # TODO: don't need api key, code adjustments needed
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/nvidia/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config: {}
  - provider_id: localfs
    provider_type: inline::localfs
    config: {}
  scoring:
  - provider_id: basic
    provider_type: inline::basic
    config: {}
  - provider_id: llm-as-judge
    provider_type: inline::llm-as-judge
    config: {}
  - provider_id: braintrust
    provider_type: inline::braintrust
    config:
      openai_api_key: ${env.OPENAI_API_KEY:}
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
    config:
      api_key: ${env.BRAVE_SEARCH_API_KEY:}
      max_results: 3
  - provider_id: tavily-search
    provider_type: remote::tavily-search
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
  - provider_id: code-interpreter
    provider_type: inline::code-interpreter
    config: {}
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: nvidia
  model_type: llm
 shields: []
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
 tool_groups: []
		`@ -1 +0,0 @@`
			`../../llama_stack/templates/nvidia/build.yaml`