Auto-generate distro yamls + docs (#468)

# What does this PR do?

Automatically generates
- build.yaml
- run.yaml
- run-with-safety.yaml
- parts of markdown docs

for the distributions.

## Test Plan

At this point, this only updates the YAMLs and the docs. Some testing
(especially with ollama and vllm) has been performed but needs to be
much more tested.
This commit is contained in:
Ashwin Bharambe 2024-11-18 14:57:06 -08:00 committed by GitHub
parent 0784284ab5
commit 2a31163178
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
88 changed files with 3008 additions and 852 deletions

View file

@ -0,0 +1,7 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .fireworks import get_distribution_template # noqa: F401

View file

@ -1,11 +1,19 @@
version: '2'
name: fireworks
distribution_spec:
description: Use Fireworks.ai for running LLM inference
description: Use Fireworks.AI for running LLM inference
docker_image: null
providers:
inference: remote::fireworks
inference:
- remote::fireworks
memory:
- inline::faiss
- remote::weaviate
safety: inline::llama-guard
agents: inline::meta-reference
telemetry: inline::meta-reference
- remote::chromadb
- remote::pgvector
safety:
- inline::llama-guard
agents:
- inline::meta-reference
telemetry:
- inline::meta-reference
image_type: conda

View file

@ -0,0 +1,60 @@
# Fireworks Distribution
The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
{{ providers_table }}
{% if run_config_env_vars %}
### Environment Variables
The following environment variables can be configured:
{% for var, (default_value, description) in run_config_env_vars.items() %}
- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
{% endfor %}
{% endif %}
{% if default_models %}
### Models
The following models are available by default:
{% for model in default_models %}
- `{{ model.model_id }}`
{% endfor %}
{% endif %}
### Prerequisite: API Keys
Make sure you have access to a Fireworks API Key. You can get one by visiting [fireworks.ai](https://fireworks.ai/).
## Running Llama Stack with Fireworks
You can do this via Conda (build code) or Docker which has a pre-built image.
### Via Docker
This method allows you to get started quickly without having to build the distribution code.
```bash
LLAMA_STACK_PORT=5001
docker run \
-it \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ./run.yaml:/root/my-run.yaml \
llamastack/distribution-{{ name }} \
/root/my-run.yaml \
--port $LLAMA_STACK_PORT \
--env FIREWORKS_API_KEY=$FIREWORKS_API_KEY
```
### Via Conda
```bash
llama stack build --template fireworks --image-type conda
llama stack run ./run.yaml \
--port 5001 \
--env FIREWORKS_API_KEY=$FIREWORKS_API_KEY
```

View file

@ -0,0 +1,60 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from pathlib import Path
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
def get_distribution_template() -> DistributionTemplate:
providers = {
"inference": ["remote::fireworks"],
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
}
inference_provider = Provider(
provider_id="fireworks",
provider_type="remote::fireworks",
config=FireworksImplConfig.sample_run_config(),
)
default_models = [ModelInput(model_id=m.provider_model_id) for m in MODEL_ALIASES]
return DistributionTemplate(
name="fireworks",
distro_type="self_hosted",
description="Use Fireworks.AI for running LLM inference",
docker_image=None,
template_path=Path(__file__).parent / "doc_template.md",
providers=providers,
default_models=default_models,
run_configs={
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider],
},
default_models=default_models,
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
),
},
run_config_env_vars={
"LLAMASTACK_PORT": (
"5001",
"Port for the Llama Stack distribution server",
),
"FIREWORKS_API_KEY": (
"",
"Fireworks.AI API Key",
),
},
)

View file

@ -0,0 +1,91 @@
version: '2'
image_name: fireworks
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: fireworks
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference
api_key: ${env.FIREWORKS_API_KEY}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
models:
- metadata: {}
model_id: fireworks/llama-v3p1-8b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p1-70b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p1-405b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p2-1b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p2-3b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p2-11b-vision-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p2-90b-vision-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-guard-3-8b
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-guard-3-11b-vision
provider_id: null
provider_model_id: null
shields:
- params: null
shield_id: meta-llama/Llama-Guard-3-8B
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []