diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7e05c683a..5e19e73b7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -12,6 +12,11 @@ We actively welcome your pull requests. 5. Make sure your code lints. 6. If you haven't already, complete the Contributor License Agreement ("CLA"). + +### Updating Provider Configurations + +If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `python llama_stack/scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated. + ### Building the Documentation If you are making changes to the documentation at [https://llama-stack.readthedocs.io/en/latest/](https://llama-stack.readthedocs.io/en/latest/), you can use the following command to build the documentation and preview your changes. You will need [Sphinx](https://www.sphinx-doc.org/en/master/) and the readthedocs theme. diff --git a/distributions/fireworks/run.yaml b/distributions/fireworks/run.yaml deleted file mode 100644 index 8d3316257..000000000 --- a/distributions/fireworks/run.yaml +++ /dev/null @@ -1,91 +0,0 @@ -version: '2' -image_name: fireworks -docker_image: null -conda_env: null -apis: -- agents -- inference -- memory -- safety -- telemetry -providers: - inference: - - provider_id: fireworks - provider_type: remote::fireworks - config: - url: https://api.fireworks.ai/inference - api_key: ${env.FIREWORKS_API_KEY} - memory: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db -models: -- metadata: {} - model_id: fireworks/llama-v3p1-8b-instruct - provider_id: null - provider_model_id: null -- metadata: {} - model_id: fireworks/llama-v3p1-70b-instruct - provider_id: null - provider_model_id: null -- metadata: {} - model_id: fireworks/llama-v3p1-405b-instruct - provider_id: null - provider_model_id: null -- metadata: {} - model_id: fireworks/llama-v3p2-1b-instruct - provider_id: null - provider_model_id: null -- metadata: {} - model_id: fireworks/llama-v3p2-3b-instruct - provider_id: null - provider_model_id: null -- metadata: {} - model_id: fireworks/llama-v3p2-11b-vision-instruct - provider_id: null - provider_model_id: null -- metadata: {} - model_id: fireworks/llama-v3p2-90b-vision-instruct - provider_id: null - provider_model_id: null -- metadata: {} - model_id: fireworks/llama-guard-3-8b - provider_id: null - provider_model_id: null -- metadata: {} - model_id: fireworks/llama-guard-3-11b-vision - provider_id: null - provider_model_id: null -shields: -- params: null - shield_id: meta-llama/Llama-Guard-3-8B - provider_id: null - provider_shield_id: null -memory_banks: [] -datasets: [] -scoring_fns: [] -eval_tasks: [] diff --git a/distributions/fireworks/run.yaml b/distributions/fireworks/run.yaml new file mode 120000 index 000000000..532e0e2a8 --- /dev/null +++ b/distributions/fireworks/run.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/fireworks/run.yaml \ No newline at end of file diff --git a/distributions/meta-reference-gpu/run-with-safety.yaml b/distributions/meta-reference-gpu/run-with-safety.yaml deleted file mode 100644 index 7d01159df..000000000 --- a/distributions/meta-reference-gpu/run-with-safety.yaml +++ /dev/null @@ -1,70 +0,0 @@ -version: '2' -image_name: meta-reference-gpu -docker_image: null -conda_env: null -apis: -- agents -- inference -- memory -- safety -- telemetry -providers: - inference: - - provider_id: meta-reference-inference - provider_type: inline::meta-reference - config: - model: ${env.INFERENCE_MODEL} - max_seq_len: 4096 - checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} - - provider_id: meta-reference-safety - provider_type: inline::meta-reference - config: - model: ${env.SAFETY_MODEL} - max_seq_len: 4096 - checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null} - memory: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: meta-reference-inference - provider_model_id: null -- metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: meta-reference-safety - provider_model_id: null -shields: -- params: null - shield_id: ${env.SAFETY_MODEL} - provider_id: null - provider_shield_id: null -memory_banks: [] -datasets: [] -scoring_fns: [] -eval_tasks: [] diff --git a/distributions/meta-reference-gpu/run-with-safety.yaml b/distributions/meta-reference-gpu/run-with-safety.yaml new file mode 120000 index 000000000..4c5483425 --- /dev/null +++ b/distributions/meta-reference-gpu/run-with-safety.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/meta-reference-gpu/run-with-safety.yaml \ No newline at end of file diff --git a/distributions/meta-reference-gpu/run.yaml b/distributions/meta-reference-gpu/run.yaml deleted file mode 100644 index c67ba60cd..000000000 --- a/distributions/meta-reference-gpu/run.yaml +++ /dev/null @@ -1,56 +0,0 @@ -version: '2' -image_name: meta-reference-gpu -docker_image: null -conda_env: null -apis: -- agents -- inference -- memory -- safety -- telemetry -providers: - inference: - - provider_id: meta-reference-inference - provider_type: inline::meta-reference - config: - model: ${env.INFERENCE_MODEL} - max_seq_len: 4096 - checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} - memory: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: meta-reference-inference - provider_model_id: null -shields: [] -memory_banks: [] -datasets: [] -scoring_fns: [] -eval_tasks: [] diff --git a/distributions/meta-reference-gpu/run.yaml b/distributions/meta-reference-gpu/run.yaml new file mode 120000 index 000000000..d680186ab --- /dev/null +++ b/distributions/meta-reference-gpu/run.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/meta-reference-gpu/run.yaml \ No newline at end of file diff --git a/distributions/ollama/run-with-safety.yaml b/distributions/ollama/run-with-safety.yaml deleted file mode 100644 index d0f657377..000000000 --- a/distributions/ollama/run-with-safety.yaml +++ /dev/null @@ -1,62 +0,0 @@ -version: '2' -image_name: ollama -docker_image: null -conda_env: null -apis: -- agents -- inference -- memory -- safety -- telemetry -providers: - inference: - - provider_id: ollama - provider_type: remote::ollama - config: - url: ${env.OLLAMA_URL:http://localhost:11434} - memory: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: ollama - provider_model_id: null -- metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: ollama - provider_model_id: null -shields: -- params: null - shield_id: ${env.SAFETY_MODEL} - provider_id: null - provider_shield_id: null -memory_banks: [] -datasets: [] -scoring_fns: [] -eval_tasks: [] diff --git a/distributions/ollama/run-with-safety.yaml b/distributions/ollama/run-with-safety.yaml new file mode 120000 index 000000000..5695b49e7 --- /dev/null +++ b/distributions/ollama/run-with-safety.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/ollama/run-with-safety.yaml \ No newline at end of file diff --git a/distributions/ollama/run.yaml b/distributions/ollama/run.yaml deleted file mode 100644 index c4003006b..000000000 --- a/distributions/ollama/run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -version: '2' -image_name: ollama -docker_image: null -conda_env: null -apis: -- agents -- inference -- memory -- safety -- telemetry -providers: - inference: - - provider_id: ollama - provider_type: remote::ollama - config: - url: ${env.OLLAMA_URL:http://localhost:11434} - memory: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: ollama - provider_model_id: null -shields: [] -memory_banks: [] -datasets: [] -scoring_fns: [] -eval_tasks: [] diff --git a/distributions/ollama/run.yaml b/distributions/ollama/run.yaml new file mode 120000 index 000000000..b008b1bf4 --- /dev/null +++ b/distributions/ollama/run.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/ollama/run.yaml \ No newline at end of file diff --git a/distributions/remote-vllm/run-with-safety.yaml b/distributions/remote-vllm/run-with-safety.yaml deleted file mode 100644 index 075cd793f..000000000 --- a/distributions/remote-vllm/run-with-safety.yaml +++ /dev/null @@ -1,70 +0,0 @@ -version: '2' -image_name: remote-vllm -docker_image: null -conda_env: null -apis: -- agents -- inference -- memory -- safety -- telemetry -providers: - inference: - - provider_id: vllm-inference - provider_type: remote::vllm - config: - url: ${env.VLLM_URL} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - - provider_id: vllm-safety - provider_type: remote::vllm - config: - url: ${env.SAFETY_VLLM_URL} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - memory: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - provider_model_id: null -- metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: vllm-safety - provider_model_id: null -shields: -- params: null - shield_id: ${env.SAFETY_MODEL} - provider_id: null - provider_shield_id: null -memory_banks: [] -datasets: [] -scoring_fns: [] -eval_tasks: [] diff --git a/distributions/remote-vllm/run-with-safety.yaml b/distributions/remote-vllm/run-with-safety.yaml new file mode 120000 index 000000000..b2c3c36da --- /dev/null +++ b/distributions/remote-vllm/run-with-safety.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/remote-vllm/run-with-safety.yaml \ No newline at end of file diff --git a/distributions/remote-vllm/run.yaml b/distributions/remote-vllm/run.yaml deleted file mode 100644 index da45acee2..000000000 --- a/distributions/remote-vllm/run.yaml +++ /dev/null @@ -1,56 +0,0 @@ -version: '2' -image_name: remote-vllm -docker_image: null -conda_env: null -apis: -- agents -- inference -- memory -- safety -- telemetry -providers: - inference: - - provider_id: vllm-inference - provider_type: remote::vllm - config: - url: ${env.VLLM_URL} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - memory: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - provider_model_id: null -shields: [] -memory_banks: [] -datasets: [] -scoring_fns: [] -eval_tasks: [] diff --git a/distributions/remote-vllm/run.yaml b/distributions/remote-vllm/run.yaml new file mode 120000 index 000000000..ac70c0e6a --- /dev/null +++ b/distributions/remote-vllm/run.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/remote-vllm/run.yaml \ No newline at end of file diff --git a/distributions/tgi/run-with-safety.yaml b/distributions/tgi/run-with-safety.yaml deleted file mode 100644 index b1f12cc88..000000000 --- a/distributions/tgi/run-with-safety.yaml +++ /dev/null @@ -1,66 +0,0 @@ -version: '2' -image_name: tgi -docker_image: llamastack/distribution-tgi:test-0.0.52rc3 -conda_env: null -apis: -- agents -- inference -- memory -- safety -- telemetry -providers: - inference: - - provider_id: tgi-inference - provider_type: remote::tgi - config: - url: ${env.TGI_URL} - - provider_id: tgi-safety - provider_type: remote::tgi - config: - url: ${env.TGI_SAFETY_URL} - memory: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: tgi-inference - provider_model_id: null -- metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: tgi-safety - provider_model_id: null -shields: -- params: null - shield_id: ${env.SAFETY_MODEL} - provider_id: null - provider_shield_id: null -memory_banks: [] -datasets: [] -scoring_fns: [] -eval_tasks: [] diff --git a/distributions/tgi/run-with-safety.yaml b/distributions/tgi/run-with-safety.yaml new file mode 120000 index 000000000..62d26708e --- /dev/null +++ b/distributions/tgi/run-with-safety.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/tgi/run-with-safety.yaml \ No newline at end of file diff --git a/distributions/tgi/run.yaml b/distributions/tgi/run.yaml deleted file mode 100644 index 5571beabd..000000000 --- a/distributions/tgi/run.yaml +++ /dev/null @@ -1,54 +0,0 @@ -version: '2' -image_name: tgi -docker_image: llamastack/distribution-tgi:test-0.0.52rc3 -conda_env: null -apis: -- agents -- inference -- memory -- safety -- telemetry -providers: - inference: - - provider_id: tgi-inference - provider_type: remote::tgi - config: - url: ${env.TGI_URL} - memory: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: tgi-inference - provider_model_id: null -shields: [] -memory_banks: [] -datasets: [] -scoring_fns: [] -eval_tasks: [] diff --git a/distributions/tgi/run.yaml b/distributions/tgi/run.yaml new file mode 120000 index 000000000..f3cc3a502 --- /dev/null +++ b/distributions/tgi/run.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/tgi/run.yaml \ No newline at end of file diff --git a/distributions/together/run.yaml b/distributions/together/run.yaml deleted file mode 100644 index cc3c890f4..000000000 --- a/distributions/together/run.yaml +++ /dev/null @@ -1,87 +0,0 @@ -version: '2' -image_name: together -docker_image: null -conda_env: null -apis: -- agents -- inference -- memory -- safety -- telemetry -providers: - inference: - - provider_id: together - provider_type: remote::together - config: - url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY} - memory: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db -models: -- metadata: {} - model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - provider_id: null - provider_model_id: null -- metadata: {} - model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo - provider_id: null - provider_model_id: null -- metadata: {} - model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - provider_id: null - provider_model_id: null -- metadata: {} - model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo - provider_id: null - provider_model_id: null -- metadata: {} - model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo - provider_id: null - provider_model_id: null -- metadata: {} - model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo - provider_id: null - provider_model_id: null -- metadata: {} - model_id: meta-llama/Meta-Llama-Guard-3-8B - provider_id: null - provider_model_id: null -- metadata: {} - model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo - provider_id: null - provider_model_id: null -shields: -- params: null - shield_id: meta-llama/Llama-Guard-3-1B - provider_id: null - provider_shield_id: null -memory_banks: [] -datasets: [] -scoring_fns: [] -eval_tasks: [] diff --git a/distributions/together/run.yaml b/distributions/together/run.yaml new file mode 120000 index 000000000..102d9866e --- /dev/null +++ b/distributions/together/run.yaml @@ -0,0 +1 @@ +../../llama_stack/templates/together/run.yaml \ No newline at end of file diff --git a/llama_stack/providers/remote/inference/ollama/docker_compose.yaml b/llama_stack/providers/remote/inference/ollama/docker_compose.yaml deleted file mode 100644 index 9bb7a143b..000000000 --- a/llama_stack/providers/remote/inference/ollama/docker_compose.yaml +++ /dev/null @@ -1,55 +0,0 @@ -services: - ${SERVICE_NAME:-ollama}: - image: ollama/ollama:latest - ports: - - ${OLLAMA_PORT:-11434}:${OLLAMA_PORT:-11434} - volumes: - - $HOME/.ollama:/root/.ollama - devices: - - nvidia.com/gpu=all - runtime: nvidia - healthcheck: - test: ["CMD", "curl", "-f", "http://ollama:11434"] - interval: 10s - timeout: 5s - retries: 5 - - ${SERVICE_NAME:-ollama}-init: - image: ollama/ollama - depends_on: - - ${SERVICE_NAME:-ollama}: - condition: service_healthy - environment: - - OLLAMA_HOST=ollama - - OLLAMA_MODELS=${OLLAMA_MODELS} - volumes: - - $HOME/.ollama:/root/.ollama - entrypoint: > - sh -c ' - max_attempts=30; - attempt=0; - - echo "Waiting for Ollama server..."; - until curl -s http://ollama:11434 > /dev/null; do - attempt=$((attempt + 1)); - if [ $attempt -ge $max_attempts ]; then - echo "Timeout waiting for Ollama server"; - exit 1; - fi; - echo "Attempt $attempt: Server not ready yet..."; - sleep 5; - done; - - echo "Server ready. Pulling models..."; - - models="${OLLAMA_MODELS}"; - for model in $models; do - echo "Pulling $model..."; - if ! ollama pull "$model"; then - echo "Failed to pull $model"; - exit 1; - fi; - done; - - echo "All models pulled successfully" - ' diff --git a/llama_stack/scripts/distro_codegen.py b/llama_stack/scripts/distro_codegen.py index 2f41ec6ea..47d2dc41c 100644 --- a/llama_stack/scripts/distro_codegen.py +++ b/llama_stack/scripts/distro_codegen.py @@ -40,7 +40,7 @@ def process_template(template_dir: Path, progress) -> None: template = template_func() template.save_distribution( - yaml_output_dir=REPO_ROOT / "distributions" / template.name, + yaml_output_dir=REPO_ROOT / "llama_stack" / "templates" / template.name, doc_output_dir=REPO_ROOT / "docs/source/getting_started/distributions" / f"{template.distro_type}_distro", diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml new file mode 100644 index 000000000..8d3316257 --- /dev/null +++ b/llama_stack/templates/fireworks/run.yaml @@ -0,0 +1,91 @@ +version: '2' +image_name: fireworks +docker_image: null +conda_env: null +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference + api_key: ${env.FIREWORKS_API_KEY} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db +models: +- metadata: {} + model_id: fireworks/llama-v3p1-8b-instruct + provider_id: null + provider_model_id: null +- metadata: {} + model_id: fireworks/llama-v3p1-70b-instruct + provider_id: null + provider_model_id: null +- metadata: {} + model_id: fireworks/llama-v3p1-405b-instruct + provider_id: null + provider_model_id: null +- metadata: {} + model_id: fireworks/llama-v3p2-1b-instruct + provider_id: null + provider_model_id: null +- metadata: {} + model_id: fireworks/llama-v3p2-3b-instruct + provider_id: null + provider_model_id: null +- metadata: {} + model_id: fireworks/llama-v3p2-11b-vision-instruct + provider_id: null + provider_model_id: null +- metadata: {} + model_id: fireworks/llama-v3p2-90b-vision-instruct + provider_id: null + provider_model_id: null +- metadata: {} + model_id: fireworks/llama-guard-3-8b + provider_id: null + provider_model_id: null +- metadata: {} + model_id: fireworks/llama-guard-3-11b-vision + provider_id: null + provider_model_id: null +shields: +- params: null + shield_id: meta-llama/Llama-Guard-3-8B + provider_id: null + provider_shield_id: null +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml new file mode 100644 index 000000000..7d01159df --- /dev/null +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -0,0 +1,70 @@ +version: '2' +image_name: meta-reference-gpu +docker_image: null +conda_env: null +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: meta-reference-inference + provider_type: inline::meta-reference + config: + model: ${env.INFERENCE_MODEL} + max_seq_len: 4096 + checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} + - provider_id: meta-reference-safety + provider_type: inline::meta-reference + config: + model: ${env.SAFETY_MODEL} + max_seq_len: 4096 + checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: meta-reference-inference + provider_model_id: null +- metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: meta-reference-safety + provider_model_id: null +shields: +- params: null + shield_id: ${env.SAFETY_MODEL} + provider_id: null + provider_shield_id: null +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml new file mode 100644 index 000000000..c67ba60cd --- /dev/null +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -0,0 +1,56 @@ +version: '2' +image_name: meta-reference-gpu +docker_image: null +conda_env: null +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: meta-reference-inference + provider_type: inline::meta-reference + config: + model: ${env.INFERENCE_MODEL} + max_seq_len: 4096 + checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: meta-reference-inference + provider_model_id: null +shields: [] +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml new file mode 100644 index 000000000..d0f657377 --- /dev/null +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -0,0 +1,62 @@ +version: '2' +image_name: ollama +docker_image: null +conda_env: null +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: ollama + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:http://localhost:11434} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: ollama + provider_model_id: null +- metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: ollama + provider_model_id: null +shields: +- params: null + shield_id: ${env.SAFETY_MODEL} + provider_id: null + provider_shield_id: null +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml new file mode 100644 index 000000000..c4003006b --- /dev/null +++ b/llama_stack/templates/ollama/run.yaml @@ -0,0 +1,54 @@ +version: '2' +image_name: ollama +docker_image: null +conda_env: null +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: ollama + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:http://localhost:11434} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: ollama + provider_model_id: null +shields: [] +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml new file mode 100644 index 000000000..075cd793f --- /dev/null +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -0,0 +1,70 @@ +version: '2' +image_name: remote-vllm +docker_image: null +conda_env: null +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: vllm-inference + provider_type: remote::vllm + config: + url: ${env.VLLM_URL} + max_tokens: ${env.VLLM_MAX_TOKENS:4096} + api_token: ${env.VLLM_API_TOKEN:fake} + - provider_id: vllm-safety + provider_type: remote::vllm + config: + url: ${env.SAFETY_VLLM_URL} + max_tokens: ${env.VLLM_MAX_TOKENS:4096} + api_token: ${env.VLLM_API_TOKEN:fake} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + provider_model_id: null +- metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: vllm-safety + provider_model_id: null +shields: +- params: null + shield_id: ${env.SAFETY_MODEL} + provider_id: null + provider_shield_id: null +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml new file mode 100644 index 000000000..da45acee2 --- /dev/null +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -0,0 +1,56 @@ +version: '2' +image_name: remote-vllm +docker_image: null +conda_env: null +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: vllm-inference + provider_type: remote::vllm + config: + url: ${env.VLLM_URL} + max_tokens: ${env.VLLM_MAX_TOKENS:4096} + api_token: ${env.VLLM_API_TOKEN:fake} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + provider_model_id: null +shields: [] +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml new file mode 100644 index 000000000..b1f12cc88 --- /dev/null +++ b/llama_stack/templates/tgi/run-with-safety.yaml @@ -0,0 +1,66 @@ +version: '2' +image_name: tgi +docker_image: llamastack/distribution-tgi:test-0.0.52rc3 +conda_env: null +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: tgi-inference + provider_type: remote::tgi + config: + url: ${env.TGI_URL} + - provider_id: tgi-safety + provider_type: remote::tgi + config: + url: ${env.TGI_SAFETY_URL} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: tgi-inference + provider_model_id: null +- metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: tgi-safety + provider_model_id: null +shields: +- params: null + shield_id: ${env.SAFETY_MODEL} + provider_id: null + provider_shield_id: null +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml new file mode 100644 index 000000000..5571beabd --- /dev/null +++ b/llama_stack/templates/tgi/run.yaml @@ -0,0 +1,54 @@ +version: '2' +image_name: tgi +docker_image: llamastack/distribution-tgi:test-0.0.52rc3 +conda_env: null +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: tgi-inference + provider_type: remote::tgi + config: + url: ${env.TGI_URL} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: tgi-inference + provider_model_id: null +shields: [] +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml new file mode 100644 index 000000000..cc3c890f4 --- /dev/null +++ b/llama_stack/templates/together/run.yaml @@ -0,0 +1,87 @@ +version: '2' +image_name: together +docker_image: null +conda_env: null +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db +models: +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + provider_id: null + provider_model_id: null +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + provider_id: null + provider_model_id: null +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + provider_id: null + provider_model_id: null +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + provider_id: null + provider_model_id: null +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + provider_id: null + provider_model_id: null +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + provider_id: null + provider_model_id: null +- metadata: {} + model_id: meta-llama/Meta-Llama-Guard-3-8B + provider_id: null + provider_model_id: null +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + provider_id: null + provider_model_id: null +shields: +- params: null + shield_id: meta-llama/Llama-Guard-3-1B + provider_id: null + provider_shield_id: null +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: []