feat(distro): fork off a starter-gpu distribution

2025-12-18 10:09:47 +00:00 · 2025-08-22 15:20:35 -07:00 · 2025-08-22 15:20:35 -07:00 · c95db3a795
commit c95db3a795
parent 4434fcc2c3
15 changed files with 522 additions and 31 deletions
--- a/llama_stack/distributions/starter-gpu/init.py
+++ b/llama_stack/distributions/starter-gpu/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .starter_gpu import get_distribution_template  # noqa: F401
--- a/llama_stack/distributions/starter-gpu/build.yaml
+++ b/llama_stack/distributions/starter-gpu/build.yaml
@ -0,0 +1,59 @@
+version: 2
+distribution_spec:
+  description: Quick start template for running Llama Stack with several popular providers.
+    This distribution is intended for GPU-enabled environments.
+  providers:
+    inference:
+    - provider_type: remote::cerebras
+    - provider_type: remote::ollama
+    - provider_type: remote::vllm
+    - provider_type: remote::tgi
+    - provider_type: remote::fireworks
+    - provider_type: remote::together
+    - provider_type: remote::bedrock
+    - provider_type: remote::nvidia
+    - provider_type: remote::openai
+    - provider_type: remote::anthropic
+    - provider_type: remote::gemini
+    - provider_type: remote::vertexai
+    - provider_type: remote::groq
+    - provider_type: remote::sambanova
+    - provider_type: inline::sentence-transformers
+    vector_io:
+    - provider_type: inline::faiss
+    - provider_type: inline::sqlite-vec
+    - provider_type: inline::milvus
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    files:
+    - provider_type: inline::localfs
+    safety:
+    - provider_type: inline::llama-guard
+    - provider_type: inline::code-scanner
+    agents:
+    - provider_type: inline::meta-reference
+    telemetry:
+    - provider_type: inline::meta-reference
+    post_training:
+    - provider_type: inline::torchtune-gpu
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+    batches:
+    - provider_type: inline::reference
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- asyncpg
+- sqlalchemy[asyncio]
--- a/llama_stack/distributions/starter-gpu/run.yaml
+++ b/llama_stack/distributions/starter-gpu/run.yaml
@ -0,0 +1,238 @@
+version: 2
+image_name: starter-gpu
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
+    provider_type: remote::cerebras
+    config:
+      base_url: https://api.cerebras.ai
+      api_key: ${env.CEREBRAS_API_KEY:=}
+  - provider_id: ${env.OLLAMA_URL:+ollama}
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+  - provider_id: ${env.VLLM_URL:+vllm}
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: ${env.TGI_URL:+tgi}
+    provider_type: remote::tgi
+    config:
+      url: ${env.TGI_URL:=}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  - provider_id: bedrock
+    provider_type: remote::bedrock
+  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: sambanova
+    provider_type: remote::sambanova
+    config:
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
+      kvstore:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db
+  - provider_id: ${env.MILVUS_URL:+milvus}
+    provider_type: inline::milvus
+    config:
+      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
+      kvstore:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
+  - provider_id: ${env.CHROMADB_URL:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      kvstore:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
+  - provider_id: ${env.PGVECTOR_DB:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      kvstore:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
+      metadata_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/responses_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/trace_store.db
+      otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
+  post_training:
+  - provider_id: torchtune-gpu
+    provider_type: inline::torchtune-gpu
+    config:
+      checkpoint_format: meta
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/meta_reference_eval.db
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/huggingface_datasetio.db
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  batches:
+  - provider_id: reference
+    provider_type: inline::reference
+    config:
+      kvstore:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/batches.db
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/inference_store.db
+models: []
+shields:
+- shield_id: llama-guard
+  provider_id: ${env.SAFETY_MODEL:+llama-guard}
+  provider_shield_id: ${env.SAFETY_MODEL:=}
+- shield_id: code-scanner
+  provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
+  provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+server:
+  port: 8321
--- a/llama_stack/distributions/starter-gpu/starter_gpu.py
+++ b/llama_stack/distributions/starter-gpu/starter_gpu.py
@ -0,0 +1,22 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from llama_stack.distributions.template import BuildProvider, DistributionTemplate
+
+from ..starter.starter import get_distribution_template as get_starter_distribution_template
+
+
+def get_distribution_template() -> DistributionTemplate:
+    template = get_starter_distribution_template()
+    name = "starter-gpu"
+    template.name = name
+    template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments."
+
+    template.providers["post_training"] = [
+        BuildProvider(provider_type="inline::torchtune-gpu"),
+    ]
+    return template