From 27e07b44b557c8a37ff24c865b407faa33b67d76 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Wed, 15 Jan 2025 14:15:56 -0800 Subject: [PATCH] remove inline-nvidia templates --- distributions/inline-nvidia/build.yaml | 1 - distributions/inline-nvidia/compose.yaml | 58 ------------- distributions/inline-nvidia/run.yaml | 100 ----------------------- 3 files changed, 159 deletions(-) delete mode 120000 distributions/inline-nvidia/build.yaml delete mode 100644 distributions/inline-nvidia/compose.yaml delete mode 100644 distributions/inline-nvidia/run.yaml diff --git a/distributions/inline-nvidia/build.yaml b/distributions/inline-nvidia/build.yaml deleted file mode 120000 index 8903d2e57..000000000 --- a/distributions/inline-nvidia/build.yaml +++ /dev/null @@ -1 +0,0 @@ -../../llama_stack/templates/nvidia/build.yaml \ No newline at end of file diff --git a/distributions/inline-nvidia/compose.yaml b/distributions/inline-nvidia/compose.yaml deleted file mode 100644 index 644b7d23d..000000000 --- a/distributions/inline-nvidia/compose.yaml +++ /dev/null @@ -1,58 +0,0 @@ -services: - nim: - image: ${DOCKER_IMAGE:-nvcr.io/nim/meta/llama-3.1-8b-instruct:latest} - network_mode: "host" - volumes: - - nim-llm-cache:/opt/nim/.cache - ports: - - "8000:8000" - shm_size: 16G - environment: - - CUDA_VISIBLE_DEVICES=0 - - NIM_HTTP_API_PORT=8000 - - NIM_TRITON_LOG_VERBOSE=1 - - NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}} - command: [] - deploy: - resources: - reservations: - devices: - - driver: nvidia - # that's the closest analogue to --gpus; provide - # an integer amount of devices or 'all' - count: 1 - # Devices are reserved using a list of capabilities, making - # capabilities the only required field. A device MUST - # satisfy all the requested capabilities for a successful - # reservation. - capabilities: [gpu] - runtime: nvidia - healthcheck: - test: ["CMD", "curl", "http://localhost:8000/v1/health/ready"] - interval: 5s - timeout: 5s - retries: 30 - start_period: 120s - llamastack: - depends_on: - - nim - image: distribution-nvidia:dev - network_mode: "host" - volumes: - - ~/.llama:/root/.llama - - ./run.yaml:/root/llamastack-run-nvidia.yaml - ports: - - "5000:5000" - environment: - - INFERENCE_MODEL=${INFERENCE_MODEL:-Llama3.1-8B-Instruct} - - NVIDIA_API_KEY=${NVIDIA_API_KEY:-} - entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml-config /root/llamastack-run-nvidia.yaml" - deploy: - restart_policy: - condition: on-failure - delay: 3s - max_attempts: 5 - window: 60s -volumes: - nim-llm-cache: - driver: local diff --git a/distributions/inline-nvidia/run.yaml b/distributions/inline-nvidia/run.yaml deleted file mode 100644 index e96a0429c..000000000 --- a/distributions/inline-nvidia/run.yaml +++ /dev/null @@ -1,100 +0,0 @@ -version: '2' -image_name: nvidia -conda_env: nvidia -apis: -- agents -- datasetio -- eval -- inference -- memory -- safety -- scoring -- telemetry -- tool_runtime -providers: - inference: - - provider_id: nvidia - provider_type: remote::nvidia - config: - url: http://localhost:8000 - api_key: ${env.NVIDIA_API_KEY} # TODO: don't need api key, code adjustments needed - memory: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: ${env.OTEL_SERVICE_NAME:llama-stack} - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/nvidia/trace_store.db} - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: {} - - provider_id: localfs - provider_type: inline::localfs - config: {} - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: code-interpreter - provider_type: inline::code-interpreter - config: {} - - provider_id: memory-runtime - provider_type: inline::memory-runtime - config: {} -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: nvidia - model_type: llm -shields: [] -memory_banks: [] -datasets: [] -scoring_fns: [] -eval_tasks: [] -tool_groups: []