more progress on auto-generation

This commit is contained in:
Ashwin Bharambe 2024-11-15 09:35:38 -08:00
parent cfa913fdd5
commit e4509cb568
10 changed files with 309 additions and 73 deletions

View file

@ -33,7 +33,7 @@ providers:
persistence_store:
namespace: null
type: sqlite
db_path: ~/.llama/runtime/kvstore.db
db_path: ${env.SQLITE_STORE_DIR:/home/ashwin/.llama/runtime}/kvstore.db
telemetry:
- provider_id: meta0
provider_type: inline::meta-reference

View file

@ -14,9 +14,9 @@ class MetaReferenceAgentsImplConfig(BaseModel):
persistence_store: KVStoreConfig = Field(default=SqliteKVStoreConfig())
@classmethod
def sample_dict(cls):
def sample_run_config(cls):
return {
"persistence_store": SqliteKVStoreConfig.sample_dict(
"persistence_store": SqliteKVStoreConfig.sample_run_config(
db_name="agents_store.db"
),
}

View file

@ -35,7 +35,7 @@ class VLLMConfig(BaseModel):
)
@classmethod
def sample_dict(cls):
def sample_run_config(cls):
return {
"model": "${env.VLLM_INFERENCE_MODEL:Llama3.2-3B-Instruct}",
"tensor_parallel_size": "${env.VLLM_TENSOR_PARALLEL_SIZE:1}",

View file

@ -11,9 +11,3 @@ from pydantic import BaseModel
class LlamaGuardConfig(BaseModel):
excluded_categories: List[str] = []
@classmethod
def sample_dict(cls):
return {
"excluded_categories": [],
}

View file

@ -4,11 +4,34 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Optional
from llama_stack.distribution.datatypes import RemoteProviderConfig
from llama_stack.providers.utils.docker.service_config import DockerComposeServiceConfig
DEFAULT_OLLAMA_PORT = 11434
class OllamaImplConfig(RemoteProviderConfig):
port: int = 11434
port: int = DEFAULT_OLLAMA_PORT
@classmethod
def sample_docker_compose_config(cls) -> Optional[DockerComposeServiceConfig]:
return DockerComposeServiceConfig(
image="ollama/ollama:latest",
volumes=["$HOME/.ollama:/root/.ollama"],
devices=["nvidia.com/gpu=all"],
deploy={
"resources": {
"reservations": {
"devices": [{"driver": "nvidia", "capabilities": ["gpu"]}]
}
}
},
runtime="nvidia",
ports=[f"{DEFAULT_OLLAMA_PORT}:{DEFAULT_OLLAMA_PORT}"],
)
async def get_adapter_impl(config: RemoteProviderConfig, _deps):

View file

@ -9,6 +9,11 @@ from typing import Optional
from llama_models.schema_utils import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.docker.service_config import DockerComposeServiceConfig
DEFAULT_VLLM_PORT = 8000
@json_schema_type
class VLLMInferenceAdapterConfig(BaseModel):
@ -26,10 +31,50 @@ class VLLMInferenceAdapterConfig(BaseModel):
)
@classmethod
def sample_dict(cls):
# TODO: we may need two modes, one for conda and one for docker
def sample_run_config(
cls,
url: str = "${env.VLLM_URL:http://host.docker.internal:5100/v1}",
):
return {
"url": "${env.VLLM_URL:http://host.docker.internal:5100/v1}",
"url": url,
"max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
"api_token": "${env.VLLM_API_TOKEN:fake}",
}
@classmethod
def sample_docker_compose_config(
cls,
port: int = DEFAULT_VLLM_PORT,
cuda_visible_devices: str = "0",
model: str = "meta-llama/Llama-3.2-3B-Instruct",
) -> Optional[DockerComposeServiceConfig]:
return DockerComposeServiceConfig(
image="vllm/vllm-openai:latest",
volumes=["$HOME/.cache/huggingface:/root/.cache/huggingface"],
devices=["nvidia.com/gpu=all"],
deploy={
"resources": {
"reservations": {
"devices": [{"driver": "nvidia", "capabilities": ["gpu"]}]
}
}
},
runtime="nvidia",
ports=[f"{port}:{port}"],
environment={
"CUDA_VISIBLE_DEVICES": cuda_visible_devices,
"HUGGING_FACE_HUB_TOKEN": "$HF_TOKEN",
},
command=(
" ".join(
[
"--gpu-memory-utilization 0.75",
f"--model {model}",
"--enforce-eager",
"--max-model-len 8192",
"--max-num-seqs 16",
f"--port {port}",
]
)
),
)

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -0,0 +1,29 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any, Dict, List, Optional
from pydantic import BaseModel
class LiteralString(str):
pass # Marker class for strings we want to format with >
class DockerComposeServiceConfig(BaseModel):
"""Configuration for a single service in docker-compose."""
image: str
volumes: Optional[List[str]] = None
network_mode: str = "bridge"
ports: Optional[List[str]] = None
devices: Optional[List[str]] = None
environment: Optional[Dict[str, str]] = None
command: Optional[str] = None
depends_on: Optional[List[str]] = None
deploy: Optional[Dict[str, Any]] = None
runtime: Optional[str] = None
entrypoint: Optional[str] = None

View file

@ -37,7 +37,7 @@ class RedisKVStoreConfig(CommonConfig):
return f"redis://{self.host}:{self.port}"
@classmethod
def sample_dict(cls):
def sample_run_config(cls):
return {
"type": "redis",
"namespace": None,
@ -54,7 +54,7 @@ class SqliteKVStoreConfig(CommonConfig):
)
@classmethod
def sample_dict(cls, db_name: str = "kvstore.db"):
def sample_run_config(cls, db_name: str = "kvstore.db"):
return {
"type": "sqlite",
"namespace": None,
@ -72,7 +72,7 @@ class PostgresKVStoreConfig(CommonConfig):
table_name: str = "llamastack_kvstore"
@classmethod
def sample_dict(cls, table_name: str = "llamastack_kvstore"):
def sample_run_config(cls, table_name: str = "llamastack_kvstore"):
return {
"type": "postgres",
"namespace": None,

View file

@ -4,19 +4,22 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from datetime import datetime
from io import StringIO
from pathlib import Path
from typing import Dict, List, Optional, Set
from typing import Any, Dict, List, Optional, Set, Tuple
import jinja2
import yaml
from pydantic import BaseModel
from pydantic import BaseModel, Field
from rich.console import Console
from rich.table import Table
from llama_stack.distribution.datatypes import (
Api,
BuildConfig,
DistributionSpec,
KVStoreConfig,
@ -25,6 +28,12 @@ from llama_stack.distribution.datatypes import (
ShieldInput,
StackRunConfig,
)
from llama_stack.distribution.distribution import get_provider_registry
from llama_stack.distribution.utils.dynamic import instantiate_class_type
from llama_stack.providers.remote.inference.vllm.config import (
VLLMInferenceAdapterConfig,
)
from llama_stack.providers.utils.docker.service_config import DockerComposeServiceConfig
class DistributionTemplate(BaseModel):
@ -36,12 +45,17 @@ class DistributionTemplate(BaseModel):
name: str
description: str
providers: Dict[str, List[str]]
run_config_overrides: Dict[str, List[Provider]] = Field(default_factory=dict)
compose_config_overrides: Dict[str, Dict[str, DockerComposeServiceConfig]] = Field(
default_factory=dict
)
default_models: List[ModelInput]
default_shields: Optional[List[ShieldInput]] = None
# Optional configuration
metadata_store: Optional[KVStoreConfig] = None
env_vars: Optional[Dict[str, str]] = None
docker_compose_env_vars: Optional[Dict[str, Tuple[str, str]]] = None
docker_image: Optional[str] = None
@property
@ -59,8 +73,42 @@ class DistributionTemplate(BaseModel):
image_type="conda", # default to conda, can be overridden
)
def run_config(self, provider_configs: Dict[str, List[Provider]]) -> StackRunConfig:
from datetime import datetime
def run_config(self) -> StackRunConfig:
provider_registry = get_provider_registry()
provider_configs = {}
for api_str, provider_types in self.providers.items():
if providers := self.run_config_overrides.get(api_str):
provider_configs[api_str] = providers
continue
provider_type = provider_types[0]
provider_id = provider_type.split("::")[-1]
api = Api(api_str)
if provider_type not in provider_registry[api]:
raise ValueError(
f"Unknown provider type: {provider_type} for API: {api_str}"
)
config_class = provider_registry[api][provider_type].config_class
assert (
config_class is not None
), f"No config class for provider type: {provider_type} for API: {api_str}"
config_class = instantiate_class_type(config_class)
if hasattr(config_class, "sample_run_config"):
config = config_class.sample_run_config()
else:
config = {}
provider_configs[api_str] = [
Provider(
provider_id=provider_id,
provider_type=provider_type,
config=config,
)
]
# Get unique set of APIs from providers
apis: Set[str] = set(self.providers.keys())
@ -76,6 +124,70 @@ class DistributionTemplate(BaseModel):
shields=self.default_shields or [],
)
def docker_compose_config(self) -> Dict[str, Any]:
services = {}
provider_registry = get_provider_registry()
# Add provider services based on their sample_compose_config
for api_str, api_providers in self.providers.items():
if overrides := self.compose_config_overrides.get(api_str):
services |= overrides
continue
# only look at the first provider to get the compose config for now
# we may want to use `docker compose profiles` in the future
provider_type = api_providers[0]
provider_id = provider_type.split("::")[-1]
api = Api(api_str)
if provider_type not in provider_registry[api]:
raise ValueError(
f"Unknown provider type: {provider_type} for API: {api_str}"
)
config_class = provider_registry[api][provider_type].config_class
assert (
config_class is not None
), f"No config class for provider type: {provider_type} for API: {api_str}"
config_class = instantiate_class_type(config_class)
if not hasattr(config_class, "sample_docker_compose_config"):
continue
compose_config = config_class.sample_docker_compose_config()
services[provider_id] = compose_config
port = "${LLAMASTACK_PORT:-5001}"
# Add main llamastack service
llamastack_config = DockerComposeServiceConfig(
image=f"llamastack/distribution-{self.name}:latest",
depends_on=list(services.keys()),
volumes=[
"~/.llama:/root/.llama",
f"~/local/llama-stack/distributions/{self.name}/run.yaml:/root/llamastack-run-{self.name}.yaml",
],
ports=[f"{port}:{port}"],
environment={
k: v[0] for k, v in (self.docker_compose_env_vars or {}).items()
},
entrypoint=(
f'bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-{self.name}.yaml --port {port}"'
),
deploy={
"restart_policy": {
"condition": "on-failure",
"delay": "3s",
"max_attempts": 5,
"window": "60s",
}
},
)
services["llamastack"] = llamastack_config
return {
"services": {k: v.model_dump() for k, v in services.items()},
"volumes": {service_name: None for service_name in services.keys()},
}
def generate_markdown_docs(self) -> str:
"""Generate markdown documentation using both Jinja2 templates and rich tables."""
# First generate the providers table using rich
@ -108,7 +220,7 @@ The `llamastack/distribution-{{ name }}` distribution consists of the following
The following environment variables can be configured:
{% for var, description in env_vars.items() %}
{% for var, (value, description) in docker_compose_env_vars.items() %}
- `{{ var }}`: {{ description }}
{% endfor %}
{%- endif %}
@ -122,29 +234,6 @@ $ cd distributions/{{ name }}
$ docker compose up
```
### Manual Configuration
You can also configure the distribution manually by creating a `run.yaml` file:
```yaml
version: '2'
image_name: {{ name }}
apis:
{% for api in providers.keys() %}
- {{ api }}
{% endfor %}
providers:
{% for api, provider_list in providers.items() %}
{{ api }}:
{% for provider in provider_list %}
- provider_id: {{ provider.lower() }}-0
provider_type: {{ provider }}
config: {}
{% endfor %}
{% endfor %}
```
## Models
The following models are configured by default:
@ -170,7 +259,7 @@ The following safety shields are configured:
description=self.description,
providers=self.providers,
providers_table=providers_table,
env_vars=self.env_vars,
docker_compose_env_vars=self.docker_compose_env_vars,
default_models=self.default_models,
default_shields=self.default_shields,
)
@ -178,29 +267,24 @@ The following safety shields are configured:
def save_distribution(self, output_dir: Path) -> None:
output_dir.mkdir(parents=True, exist_ok=True)
# Save build.yaml
build_config = self.build_config()
with open(output_dir / "build.yaml", "w") as f:
yaml.safe_dump(build_config.model_dump(), f, sort_keys=False)
# Save run.yaml template
# Create a minimal provider config for the template
provider_configs = {
api: [
Provider(
provider_id=f"{provider.lower()}-0",
provider_type=provider,
config={},
)
for provider in providers
]
for api, providers in self.providers.items()
}
run_config = self.run_config(provider_configs)
run_config = self.run_config()
serialized = run_config.model_dump()
with open(output_dir / "run.yaml", "w") as f:
yaml.safe_dump(run_config.model_dump(), f, sort_keys=False)
yaml.safe_dump(serialized, f, sort_keys=False)
# serialized_str = yaml.dump(serialized, sort_keys=False)
# env_vars = set()
# for match in re.finditer(r"\${env\.([A-Za-z0-9_-]+)}", serialized_str):
# env_vars.add(match.group(1))
docker_compose = self.docker_compose_config()
with open(output_dir / "compose.yaml", "w") as f:
yaml.safe_dump(docker_compose, f, sort_keys=False, default_flow_style=False)
# Save documentation
docs = self.generate_markdown_docs()
with open(output_dir / f"{self.name}.md", "w") as f:
f.write(docs)
@ -217,21 +301,77 @@ The following safety shields are configured:
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
},
run_config_overrides={
"inference": [
Provider(
provider_id="vllm-0",
provider_type="remote::vllm",
config=VLLMInferenceAdapterConfig.sample_run_config(
url="${env.VLLM_URL:http://host.docker.internal:5100/v1}",
),
),
Provider(
provider_id="vllm-1",
provider_type="remote::vllm",
config=VLLMInferenceAdapterConfig.sample_run_config(
url="${env.SAFETY_VLLM_URL:http://host.docker.internal:5101/v1}",
),
),
]
},
compose_config_overrides={
"inference": {
"vllm-0": VLLMInferenceAdapterConfig.sample_docker_compose_config(
port=5100,
cuda_visible_devices="0",
model="${env.INFERENCE_MODEL:Llama3.2-3B-Instruct}",
),
"vllm-1": VLLMInferenceAdapterConfig.sample_docker_compose_config(
port=5100,
cuda_visible_devices="1",
model="${env.SAFETY_MODEL:Llama-Guard-3-1B}",
),
}
},
default_models=[
ModelInput(
model_id="${env.LLAMA_INFERENCE_MODEL:Llama3.1-8B-Instruct}"
model_id="${env.INFERENCE_MODEL:Llama3.2-3B-Instruct}",
provider_id="vllm-0",
),
ModelInput(
model_id="${env.SAFETY_MODEL:Llama-Guard-3-1B}",
provider_id="vllm-1",
),
ModelInput(model_id="${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}"),
],
default_shields=[
ShieldInput(shield_id="${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}")
ShieldInput(shield_id="${env.SAFETY_MODEL:Llama-Guard-3-1B}")
],
env_vars={
"LLAMA_INFERENCE_VLLM_URL": "URL of the vLLM inference server",
"LLAMA_SAFETY_VLLM_URL": "URL of the vLLM safety server",
"MAX_TOKENS": "Maximum number of tokens for generation",
"LLAMA_INFERENCE_MODEL": "Name of the inference model to use",
"LLAMA_SAFETY_MODEL": "Name of the safety model to use",
docker_compose_env_vars={
# these defaults are for the Docker Compose configuration
"VLLM_URL": (
"http://host.docker.internal:${VLLM_PORT:-5100}/v1",
"URL of the vLLM server with the main inference model",
),
"SAFETY_VLLM_URL": (
"http://host.docker.internal:${SAFETY_VLLM_PORT:-5101}/v1",
"URL of the vLLM server with the safety model",
),
"MAX_TOKENS": (
"${MAX_TOKENS:-4096}",
"Maximum number of tokens for generation",
),
"INFERENCE_MODEL": (
"${INFERENCE_MODEL:-Llama3.2-3B-Instruct}",
"Name of the inference model to use",
),
"SAFETY_MODEL": (
"${SAFETY_MODEL:-Llama-Guard-3-1B}",
"Name of the safety (Llama-Guard) model to use",
),
"LLAMASTACK_PORT": (
"${LLAMASTACK_PORT:-5001}",
"Port for the Llama Stack distribution server",
),
},
)