Update more distribution docs to be simpler and partially codegen'ed

2025-10-13 06:07:58 +00:00 · 2024-11-20 14:44:04 -08:00 · 2024-11-20 14:44:04 -08:00 · 2411a44833
commit 2411a44833
parent e84d4436b5
51 changed files with 1188 additions and 291 deletions
--- a/distributions/bedrock/run.yaml
+++ b/distributions/bedrock/run.yaml
@ -1,45 +0,0 @@
-version: '2'
-image_name: local
-name: bedrock
-docker_image: null
-conda_env: local
-apis:
- shields
- agents
- models
- memory
- memory_banks
- inference
- safety
-providers:
-  inference:
-    - provider_id: bedrock0
-      provider_type: remote::bedrock
-      config:
-        aws_access_key_id: <AWS_ACCESS_KEY_ID>
-        aws_secret_access_key: <AWS_SECRET_ACCESS_KEY>
-        aws_session_token: <AWS_SESSION_TOKEN>
-        region_name: <AWS_REGION>
-  memory:
-    - provider_id: meta0
-      provider_type: inline::meta-reference
-      config: {}
-  safety:
-    - provider_id: bedrock0
-      provider_type: remote::bedrock
-      config:
-        aws_access_key_id: <AWS_ACCESS_KEY_ID>
-        aws_secret_access_key: <AWS_SECRET_ACCESS_KEY>
-        aws_session_token: <AWS_SESSION_TOKEN>
-        region_name: <AWS_REGION>
-  agents:
-    - provider_id: meta0
-      provider_type: inline::meta-reference
-      config:
-        persistence_store:
-          type: sqlite
-          db_path: ~/.llama/runtime/kvstore.db
-  telemetry:
-    - provider_id: meta0
-      provider_type: inline::meta-reference
-      config: {}
--- a/distributions/bedrock/run.yaml
+++ b/distributions/bedrock/run.yaml
@ -0,0 +1 @@
+../../llama_stack/templates/bedrock/run.yaml
--- a/distributions/databricks/build.yaml
+++ b/distributions/databricks/build.yaml
@ -1 +0,0 @@
-../../llama_stack/templates/databricks/build.yaml
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@ -1,4 +1,32 @@
 {
+  "hf-serverless": [
+    "aiohttp",
+    "aiosqlite",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
  "together": [
    "aiosqlite",
    "blobfile",
@ -26,6 +54,33 @@
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
+  "vllm-gpu": [
+    "aiosqlite",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "vllm",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
  "remote-vllm": [
    "aiosqlite",
    "blobfile",
@ -108,6 +163,33 @@
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
+  "bedrock": [
+    "aiosqlite",
+    "blobfile",
+    "boto3",
+    "chardet",
+    "chromadb-client",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
  "meta-reference-gpu": [
    "accelerate",
    "aiosqlite",
@ -167,5 +249,33 @@
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "hf-endpoint": [
+    "aiohttp",
+    "aiosqlite",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
  ]
 }
--- a/distributions/hf-endpoint/build.yaml
+++ b/distributions/hf-endpoint/build.yaml
@ -1 +0,0 @@
-../../llama_stack/templates/hf-endpoint/build.yaml
--- a/distributions/hf-serverless/build.yaml
+++ b/distributions/hf-serverless/build.yaml
@ -1 +0,0 @@
-../../llama_stack/templates/hf-serverless/build.yaml
--- a/distributions/ollama-gpu/build.yaml
+++ b/distributions/ollama-gpu/build.yaml
@ -1 +0,0 @@
-../../llama_stack/templates/ollama/build.yaml
--- a/distributions/ollama-gpu/compose.yaml
+++ b/distributions/ollama-gpu/compose.yaml
@ -1,48 +0,0 @@
-services:
-  ollama:
-    image: ollama/ollama:latest
-    network_mode: "host"
-    volumes:
-      - ollama:/root/.ollama # this solution synchronizes with the docker volume and loads the model rocket fast
-    ports:
-      - "11434:11434"
-    devices:
-      - nvidia.com/gpu=all
-    environment:
-      - CUDA_VISIBLE_DEVICES=0
-    command: []
-    deploy:
-      resources:
-        reservations:
-          devices:
-          - driver: nvidia
-            # that's the closest analogue to --gpus; provide
-            # an integer amount of devices or 'all'
-            count: 1
-            # Devices are reserved using a list of capabilities, making
-            # capabilities the only required field. A device MUST
-            # satisfy all the requested capabilities for a successful
-            # reservation.
-            capabilities: [gpu]
-    runtime: nvidia
-  llamastack:
-    depends_on:
-    - ollama
-    image: llamastack/distribution-ollama
-    network_mode: "host"
-    volumes:
-      - ~/.llama:/root/.llama
-      # Link to ollama run.yaml file
-      - ./run.yaml:/root/llamastack-run-ollama.yaml
-    ports:
-      - "5000:5000"
-    # Hack: wait for ollama server to start before starting docker
-    entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-ollama.yaml"
-    deploy:
-      restart_policy:
-        condition: on-failure
-        delay: 3s
-        max_attempts: 5
-        window: 60s
-volumes:
-  ollama:
--- a/distributions/ollama-gpu/run.yaml
+++ b/distributions/ollama-gpu/run.yaml
@ -1,46 +0,0 @@
-version: '2'
-image_name: local
-docker_image: null
-conda_env: local
-apis:
- shields
- agents
- models
- memory
- memory_banks
- inference
- safety
-providers:
-  inference:
-  - provider_id: ollama
-    provider_type: remote::ollama
-    config:
-      url: ${env.OLLAMA_URL:http://127.0.0.1:11434}
-  safety:
-  - provider_id: meta0
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  memory:
-  - provider_id: meta0
-    provider_type: inline::meta-reference
-    config: {}
-  agents:
-  - provider_id: meta0
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        namespace: null
-        type: sqlite
-        db_path: ~/.llama/runtime/kvstore.db
-  telemetry:
-  - provider_id: meta0
-    provider_type: inline::meta-reference
-    config: {}
-models:
-  - model_id: ${env.INFERENCE_MODEL:Llama3.2-3B-Instruct}
-    provider_id: ollama
-  - model_id: ${env.SAFETY_MODEL:Llama-Guard-3-1B}
-    provider_id: ollama
-shields:
-  - shield_id: ${env.SAFETY_MODEL:Llama-Guard-3-1B}
--- a/distributions/inline-vllm/build.yaml
+++ b/distributions/inline-vllm/build.yaml
--- a/distributions/inline-vllm/compose.yaml
+++ b/distributions/inline-vllm/compose.yaml
--- a/distributions/inline-vllm/run.yaml
+++ b/distributions/inline-vllm/run.yaml
				`@ -0,0 +1 @@`
				`../../llama_stack/templates/bedrock/run.yaml`
				`@ -1 +0,0 @@`
				`../../llama_stack/templates/databricks/build.yaml`
				`@ -1 +0,0 @@`
				`../../llama_stack/templates/hf-endpoint/build.yaml`
				`@ -1 +0,0 @@`
				`../../llama_stack/templates/hf-serverless/build.yaml`
				`@ -1 +0,0 @@`
				`../../llama_stack/templates/ollama/build.yaml`