mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-01 16:24:44 +00:00
more progress on auto-generation
This commit is contained in:
parent
cfa913fdd5
commit
e4509cb568
10 changed files with 309 additions and 73 deletions
|
@ -33,7 +33,7 @@ providers:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ~/.llama/runtime/kvstore.db
|
db_path: ${env.SQLITE_STORE_DIR:/home/ashwin/.llama/runtime}/kvstore.db
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta0
|
- provider_id: meta0
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
|
|
|
@ -14,9 +14,9 @@ class MetaReferenceAgentsImplConfig(BaseModel):
|
||||||
persistence_store: KVStoreConfig = Field(default=SqliteKVStoreConfig())
|
persistence_store: KVStoreConfig = Field(default=SqliteKVStoreConfig())
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_dict(cls):
|
def sample_run_config(cls):
|
||||||
return {
|
return {
|
||||||
"persistence_store": SqliteKVStoreConfig.sample_dict(
|
"persistence_store": SqliteKVStoreConfig.sample_run_config(
|
||||||
db_name="agents_store.db"
|
db_name="agents_store.db"
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,7 +35,7 @@ class VLLMConfig(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_dict(cls):
|
def sample_run_config(cls):
|
||||||
return {
|
return {
|
||||||
"model": "${env.VLLM_INFERENCE_MODEL:Llama3.2-3B-Instruct}",
|
"model": "${env.VLLM_INFERENCE_MODEL:Llama3.2-3B-Instruct}",
|
||||||
"tensor_parallel_size": "${env.VLLM_TENSOR_PARALLEL_SIZE:1}",
|
"tensor_parallel_size": "${env.VLLM_TENSOR_PARALLEL_SIZE:1}",
|
||||||
|
|
|
@ -11,9 +11,3 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
class LlamaGuardConfig(BaseModel):
|
class LlamaGuardConfig(BaseModel):
|
||||||
excluded_categories: List[str] = []
|
excluded_categories: List[str] = []
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def sample_dict(cls):
|
|
||||||
return {
|
|
||||||
"excluded_categories": [],
|
|
||||||
}
|
|
||||||
|
|
|
@ -4,11 +4,34 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import RemoteProviderConfig
|
from llama_stack.distribution.datatypes import RemoteProviderConfig
|
||||||
|
from llama_stack.providers.utils.docker.service_config import DockerComposeServiceConfig
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_OLLAMA_PORT = 11434
|
||||||
|
|
||||||
|
|
||||||
class OllamaImplConfig(RemoteProviderConfig):
|
class OllamaImplConfig(RemoteProviderConfig):
|
||||||
port: int = 11434
|
port: int = DEFAULT_OLLAMA_PORT
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sample_docker_compose_config(cls) -> Optional[DockerComposeServiceConfig]:
|
||||||
|
return DockerComposeServiceConfig(
|
||||||
|
image="ollama/ollama:latest",
|
||||||
|
volumes=["$HOME/.ollama:/root/.ollama"],
|
||||||
|
devices=["nvidia.com/gpu=all"],
|
||||||
|
deploy={
|
||||||
|
"resources": {
|
||||||
|
"reservations": {
|
||||||
|
"devices": [{"driver": "nvidia", "capabilities": ["gpu"]}]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
runtime="nvidia",
|
||||||
|
ports=[f"{DEFAULT_OLLAMA_PORT}:{DEFAULT_OLLAMA_PORT}"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: RemoteProviderConfig, _deps):
|
async def get_adapter_impl(config: RemoteProviderConfig, _deps):
|
||||||
|
|
|
@ -9,6 +9,11 @@ from typing import Optional
|
||||||
from llama_models.schema_utils import json_schema_type
|
from llama_models.schema_utils import json_schema_type
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from llama_stack.providers.utils.docker.service_config import DockerComposeServiceConfig
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_VLLM_PORT = 8000
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class VLLMInferenceAdapterConfig(BaseModel):
|
class VLLMInferenceAdapterConfig(BaseModel):
|
||||||
|
@ -26,10 +31,50 @@ class VLLMInferenceAdapterConfig(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_dict(cls):
|
def sample_run_config(
|
||||||
# TODO: we may need two modes, one for conda and one for docker
|
cls,
|
||||||
|
url: str = "${env.VLLM_URL:http://host.docker.internal:5100/v1}",
|
||||||
|
):
|
||||||
return {
|
return {
|
||||||
"url": "${env.VLLM_URL:http://host.docker.internal:5100/v1}",
|
"url": url,
|
||||||
"max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
|
"max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
|
||||||
"api_token": "${env.VLLM_API_TOKEN:fake}",
|
"api_token": "${env.VLLM_API_TOKEN:fake}",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sample_docker_compose_config(
|
||||||
|
cls,
|
||||||
|
port: int = DEFAULT_VLLM_PORT,
|
||||||
|
cuda_visible_devices: str = "0",
|
||||||
|
model: str = "meta-llama/Llama-3.2-3B-Instruct",
|
||||||
|
) -> Optional[DockerComposeServiceConfig]:
|
||||||
|
return DockerComposeServiceConfig(
|
||||||
|
image="vllm/vllm-openai:latest",
|
||||||
|
volumes=["$HOME/.cache/huggingface:/root/.cache/huggingface"],
|
||||||
|
devices=["nvidia.com/gpu=all"],
|
||||||
|
deploy={
|
||||||
|
"resources": {
|
||||||
|
"reservations": {
|
||||||
|
"devices": [{"driver": "nvidia", "capabilities": ["gpu"]}]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
runtime="nvidia",
|
||||||
|
ports=[f"{port}:{port}"],
|
||||||
|
environment={
|
||||||
|
"CUDA_VISIBLE_DEVICES": cuda_visible_devices,
|
||||||
|
"HUGGING_FACE_HUB_TOKEN": "$HF_TOKEN",
|
||||||
|
},
|
||||||
|
command=(
|
||||||
|
" ".join(
|
||||||
|
[
|
||||||
|
"--gpu-memory-utilization 0.75",
|
||||||
|
f"--model {model}",
|
||||||
|
"--enforce-eager",
|
||||||
|
"--max-model-len 8192",
|
||||||
|
"--max-num-seqs 16",
|
||||||
|
f"--port {port}",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
5
llama_stack/providers/utils/docker/__init__.py
Normal file
5
llama_stack/providers/utils/docker/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
29
llama_stack/providers/utils/docker/service_config.py
Normal file
29
llama_stack/providers/utils/docker/service_config.py
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class LiteralString(str):
|
||||||
|
pass # Marker class for strings we want to format with >
|
||||||
|
|
||||||
|
|
||||||
|
class DockerComposeServiceConfig(BaseModel):
|
||||||
|
"""Configuration for a single service in docker-compose."""
|
||||||
|
|
||||||
|
image: str
|
||||||
|
volumes: Optional[List[str]] = None
|
||||||
|
network_mode: str = "bridge"
|
||||||
|
ports: Optional[List[str]] = None
|
||||||
|
devices: Optional[List[str]] = None
|
||||||
|
environment: Optional[Dict[str, str]] = None
|
||||||
|
command: Optional[str] = None
|
||||||
|
depends_on: Optional[List[str]] = None
|
||||||
|
deploy: Optional[Dict[str, Any]] = None
|
||||||
|
runtime: Optional[str] = None
|
||||||
|
entrypoint: Optional[str] = None
|
|
@ -37,7 +37,7 @@ class RedisKVStoreConfig(CommonConfig):
|
||||||
return f"redis://{self.host}:{self.port}"
|
return f"redis://{self.host}:{self.port}"
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_dict(cls):
|
def sample_run_config(cls):
|
||||||
return {
|
return {
|
||||||
"type": "redis",
|
"type": "redis",
|
||||||
"namespace": None,
|
"namespace": None,
|
||||||
|
@ -54,7 +54,7 @@ class SqliteKVStoreConfig(CommonConfig):
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_dict(cls, db_name: str = "kvstore.db"):
|
def sample_run_config(cls, db_name: str = "kvstore.db"):
|
||||||
return {
|
return {
|
||||||
"type": "sqlite",
|
"type": "sqlite",
|
||||||
"namespace": None,
|
"namespace": None,
|
||||||
|
@ -72,7 +72,7 @@ class PostgresKVStoreConfig(CommonConfig):
|
||||||
table_name: str = "llamastack_kvstore"
|
table_name: str = "llamastack_kvstore"
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_dict(cls, table_name: str = "llamastack_kvstore"):
|
def sample_run_config(cls, table_name: str = "llamastack_kvstore"):
|
||||||
return {
|
return {
|
||||||
"type": "postgres",
|
"type": "postgres",
|
||||||
"namespace": None,
|
"namespace": None,
|
||||||
|
|
|
@ -4,19 +4,22 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Optional, Set
|
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||||
|
|
||||||
import jinja2
|
import jinja2
|
||||||
import yaml
|
import yaml
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
from rich.table import Table
|
from rich.table import Table
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.distribution.datatypes import (
|
||||||
|
Api,
|
||||||
BuildConfig,
|
BuildConfig,
|
||||||
DistributionSpec,
|
DistributionSpec,
|
||||||
KVStoreConfig,
|
KVStoreConfig,
|
||||||
|
@ -25,6 +28,12 @@ from llama_stack.distribution.datatypes import (
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
StackRunConfig,
|
StackRunConfig,
|
||||||
)
|
)
|
||||||
|
from llama_stack.distribution.distribution import get_provider_registry
|
||||||
|
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
||||||
|
from llama_stack.providers.remote.inference.vllm.config import (
|
||||||
|
VLLMInferenceAdapterConfig,
|
||||||
|
)
|
||||||
|
from llama_stack.providers.utils.docker.service_config import DockerComposeServiceConfig
|
||||||
|
|
||||||
|
|
||||||
class DistributionTemplate(BaseModel):
|
class DistributionTemplate(BaseModel):
|
||||||
|
@ -36,12 +45,17 @@ class DistributionTemplate(BaseModel):
|
||||||
name: str
|
name: str
|
||||||
description: str
|
description: str
|
||||||
providers: Dict[str, List[str]]
|
providers: Dict[str, List[str]]
|
||||||
|
run_config_overrides: Dict[str, List[Provider]] = Field(default_factory=dict)
|
||||||
|
compose_config_overrides: Dict[str, Dict[str, DockerComposeServiceConfig]] = Field(
|
||||||
|
default_factory=dict
|
||||||
|
)
|
||||||
|
|
||||||
default_models: List[ModelInput]
|
default_models: List[ModelInput]
|
||||||
default_shields: Optional[List[ShieldInput]] = None
|
default_shields: Optional[List[ShieldInput]] = None
|
||||||
|
|
||||||
# Optional configuration
|
# Optional configuration
|
||||||
metadata_store: Optional[KVStoreConfig] = None
|
metadata_store: Optional[KVStoreConfig] = None
|
||||||
env_vars: Optional[Dict[str, str]] = None
|
docker_compose_env_vars: Optional[Dict[str, Tuple[str, str]]] = None
|
||||||
docker_image: Optional[str] = None
|
docker_image: Optional[str] = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -59,8 +73,42 @@ class DistributionTemplate(BaseModel):
|
||||||
image_type="conda", # default to conda, can be overridden
|
image_type="conda", # default to conda, can be overridden
|
||||||
)
|
)
|
||||||
|
|
||||||
def run_config(self, provider_configs: Dict[str, List[Provider]]) -> StackRunConfig:
|
def run_config(self) -> StackRunConfig:
|
||||||
from datetime import datetime
|
provider_registry = get_provider_registry()
|
||||||
|
|
||||||
|
provider_configs = {}
|
||||||
|
for api_str, provider_types in self.providers.items():
|
||||||
|
if providers := self.run_config_overrides.get(api_str):
|
||||||
|
provider_configs[api_str] = providers
|
||||||
|
continue
|
||||||
|
|
||||||
|
provider_type = provider_types[0]
|
||||||
|
provider_id = provider_type.split("::")[-1]
|
||||||
|
|
||||||
|
api = Api(api_str)
|
||||||
|
if provider_type not in provider_registry[api]:
|
||||||
|
raise ValueError(
|
||||||
|
f"Unknown provider type: {provider_type} for API: {api_str}"
|
||||||
|
)
|
||||||
|
|
||||||
|
config_class = provider_registry[api][provider_type].config_class
|
||||||
|
assert (
|
||||||
|
config_class is not None
|
||||||
|
), f"No config class for provider type: {provider_type} for API: {api_str}"
|
||||||
|
|
||||||
|
config_class = instantiate_class_type(config_class)
|
||||||
|
if hasattr(config_class, "sample_run_config"):
|
||||||
|
config = config_class.sample_run_config()
|
||||||
|
else:
|
||||||
|
config = {}
|
||||||
|
|
||||||
|
provider_configs[api_str] = [
|
||||||
|
Provider(
|
||||||
|
provider_id=provider_id,
|
||||||
|
provider_type=provider_type,
|
||||||
|
config=config,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
# Get unique set of APIs from providers
|
# Get unique set of APIs from providers
|
||||||
apis: Set[str] = set(self.providers.keys())
|
apis: Set[str] = set(self.providers.keys())
|
||||||
|
@ -76,6 +124,70 @@ class DistributionTemplate(BaseModel):
|
||||||
shields=self.default_shields or [],
|
shields=self.default_shields or [],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def docker_compose_config(self) -> Dict[str, Any]:
|
||||||
|
services = {}
|
||||||
|
provider_registry = get_provider_registry()
|
||||||
|
|
||||||
|
# Add provider services based on their sample_compose_config
|
||||||
|
for api_str, api_providers in self.providers.items():
|
||||||
|
if overrides := self.compose_config_overrides.get(api_str):
|
||||||
|
services |= overrides
|
||||||
|
continue
|
||||||
|
|
||||||
|
# only look at the first provider to get the compose config for now
|
||||||
|
# we may want to use `docker compose profiles` in the future
|
||||||
|
provider_type = api_providers[0]
|
||||||
|
provider_id = provider_type.split("::")[-1]
|
||||||
|
api = Api(api_str)
|
||||||
|
if provider_type not in provider_registry[api]:
|
||||||
|
raise ValueError(
|
||||||
|
f"Unknown provider type: {provider_type} for API: {api_str}"
|
||||||
|
)
|
||||||
|
|
||||||
|
config_class = provider_registry[api][provider_type].config_class
|
||||||
|
assert (
|
||||||
|
config_class is not None
|
||||||
|
), f"No config class for provider type: {provider_type} for API: {api_str}"
|
||||||
|
|
||||||
|
config_class = instantiate_class_type(config_class)
|
||||||
|
if not hasattr(config_class, "sample_docker_compose_config"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
compose_config = config_class.sample_docker_compose_config()
|
||||||
|
services[provider_id] = compose_config
|
||||||
|
|
||||||
|
port = "${LLAMASTACK_PORT:-5001}"
|
||||||
|
# Add main llamastack service
|
||||||
|
llamastack_config = DockerComposeServiceConfig(
|
||||||
|
image=f"llamastack/distribution-{self.name}:latest",
|
||||||
|
depends_on=list(services.keys()),
|
||||||
|
volumes=[
|
||||||
|
"~/.llama:/root/.llama",
|
||||||
|
f"~/local/llama-stack/distributions/{self.name}/run.yaml:/root/llamastack-run-{self.name}.yaml",
|
||||||
|
],
|
||||||
|
ports=[f"{port}:{port}"],
|
||||||
|
environment={
|
||||||
|
k: v[0] for k, v in (self.docker_compose_env_vars or {}).items()
|
||||||
|
},
|
||||||
|
entrypoint=(
|
||||||
|
f'bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-{self.name}.yaml --port {port}"'
|
||||||
|
),
|
||||||
|
deploy={
|
||||||
|
"restart_policy": {
|
||||||
|
"condition": "on-failure",
|
||||||
|
"delay": "3s",
|
||||||
|
"max_attempts": 5,
|
||||||
|
"window": "60s",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
services["llamastack"] = llamastack_config
|
||||||
|
return {
|
||||||
|
"services": {k: v.model_dump() for k, v in services.items()},
|
||||||
|
"volumes": {service_name: None for service_name in services.keys()},
|
||||||
|
}
|
||||||
|
|
||||||
def generate_markdown_docs(self) -> str:
|
def generate_markdown_docs(self) -> str:
|
||||||
"""Generate markdown documentation using both Jinja2 templates and rich tables."""
|
"""Generate markdown documentation using both Jinja2 templates and rich tables."""
|
||||||
# First generate the providers table using rich
|
# First generate the providers table using rich
|
||||||
|
@ -108,7 +220,7 @@ The `llamastack/distribution-{{ name }}` distribution consists of the following
|
||||||
|
|
||||||
The following environment variables can be configured:
|
The following environment variables can be configured:
|
||||||
|
|
||||||
{% for var, description in env_vars.items() %}
|
{% for var, (value, description) in docker_compose_env_vars.items() %}
|
||||||
- `{{ var }}`: {{ description }}
|
- `{{ var }}`: {{ description }}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{%- endif %}
|
{%- endif %}
|
||||||
|
@ -122,29 +234,6 @@ $ cd distributions/{{ name }}
|
||||||
$ docker compose up
|
$ docker compose up
|
||||||
```
|
```
|
||||||
|
|
||||||
### Manual Configuration
|
|
||||||
|
|
||||||
You can also configure the distribution manually by creating a `run.yaml` file:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
version: '2'
|
|
||||||
image_name: {{ name }}
|
|
||||||
apis:
|
|
||||||
{% for api in providers.keys() %}
|
|
||||||
- {{ api }}
|
|
||||||
{% endfor %}
|
|
||||||
|
|
||||||
providers:
|
|
||||||
{% for api, provider_list in providers.items() %}
|
|
||||||
{{ api }}:
|
|
||||||
{% for provider in provider_list %}
|
|
||||||
- provider_id: {{ provider.lower() }}-0
|
|
||||||
provider_type: {{ provider }}
|
|
||||||
config: {}
|
|
||||||
{% endfor %}
|
|
||||||
{% endfor %}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Models
|
## Models
|
||||||
|
|
||||||
The following models are configured by default:
|
The following models are configured by default:
|
||||||
|
@ -170,7 +259,7 @@ The following safety shields are configured:
|
||||||
description=self.description,
|
description=self.description,
|
||||||
providers=self.providers,
|
providers=self.providers,
|
||||||
providers_table=providers_table,
|
providers_table=providers_table,
|
||||||
env_vars=self.env_vars,
|
docker_compose_env_vars=self.docker_compose_env_vars,
|
||||||
default_models=self.default_models,
|
default_models=self.default_models,
|
||||||
default_shields=self.default_shields,
|
default_shields=self.default_shields,
|
||||||
)
|
)
|
||||||
|
@ -178,29 +267,24 @@ The following safety shields are configured:
|
||||||
def save_distribution(self, output_dir: Path) -> None:
|
def save_distribution(self, output_dir: Path) -> None:
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Save build.yaml
|
|
||||||
build_config = self.build_config()
|
build_config = self.build_config()
|
||||||
with open(output_dir / "build.yaml", "w") as f:
|
with open(output_dir / "build.yaml", "w") as f:
|
||||||
yaml.safe_dump(build_config.model_dump(), f, sort_keys=False)
|
yaml.safe_dump(build_config.model_dump(), f, sort_keys=False)
|
||||||
|
|
||||||
# Save run.yaml template
|
run_config = self.run_config()
|
||||||
# Create a minimal provider config for the template
|
serialized = run_config.model_dump()
|
||||||
provider_configs = {
|
|
||||||
api: [
|
|
||||||
Provider(
|
|
||||||
provider_id=f"{provider.lower()}-0",
|
|
||||||
provider_type=provider,
|
|
||||||
config={},
|
|
||||||
)
|
|
||||||
for provider in providers
|
|
||||||
]
|
|
||||||
for api, providers in self.providers.items()
|
|
||||||
}
|
|
||||||
run_config = self.run_config(provider_configs)
|
|
||||||
with open(output_dir / "run.yaml", "w") as f:
|
with open(output_dir / "run.yaml", "w") as f:
|
||||||
yaml.safe_dump(run_config.model_dump(), f, sort_keys=False)
|
yaml.safe_dump(serialized, f, sort_keys=False)
|
||||||
|
|
||||||
|
# serialized_str = yaml.dump(serialized, sort_keys=False)
|
||||||
|
# env_vars = set()
|
||||||
|
# for match in re.finditer(r"\${env\.([A-Za-z0-9_-]+)}", serialized_str):
|
||||||
|
# env_vars.add(match.group(1))
|
||||||
|
|
||||||
|
docker_compose = self.docker_compose_config()
|
||||||
|
with open(output_dir / "compose.yaml", "w") as f:
|
||||||
|
yaml.safe_dump(docker_compose, f, sort_keys=False, default_flow_style=False)
|
||||||
|
|
||||||
# Save documentation
|
|
||||||
docs = self.generate_markdown_docs()
|
docs = self.generate_markdown_docs()
|
||||||
with open(output_dir / f"{self.name}.md", "w") as f:
|
with open(output_dir / f"{self.name}.md", "w") as f:
|
||||||
f.write(docs)
|
f.write(docs)
|
||||||
|
@ -217,21 +301,77 @@ The following safety shields are configured:
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
},
|
},
|
||||||
|
run_config_overrides={
|
||||||
|
"inference": [
|
||||||
|
Provider(
|
||||||
|
provider_id="vllm-0",
|
||||||
|
provider_type="remote::vllm",
|
||||||
|
config=VLLMInferenceAdapterConfig.sample_run_config(
|
||||||
|
url="${env.VLLM_URL:http://host.docker.internal:5100/v1}",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
Provider(
|
||||||
|
provider_id="vllm-1",
|
||||||
|
provider_type="remote::vllm",
|
||||||
|
config=VLLMInferenceAdapterConfig.sample_run_config(
|
||||||
|
url="${env.SAFETY_VLLM_URL:http://host.docker.internal:5101/v1}",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
},
|
||||||
|
compose_config_overrides={
|
||||||
|
"inference": {
|
||||||
|
"vllm-0": VLLMInferenceAdapterConfig.sample_docker_compose_config(
|
||||||
|
port=5100,
|
||||||
|
cuda_visible_devices="0",
|
||||||
|
model="${env.INFERENCE_MODEL:Llama3.2-3B-Instruct}",
|
||||||
|
),
|
||||||
|
"vllm-1": VLLMInferenceAdapterConfig.sample_docker_compose_config(
|
||||||
|
port=5100,
|
||||||
|
cuda_visible_devices="1",
|
||||||
|
model="${env.SAFETY_MODEL:Llama-Guard-3-1B}",
|
||||||
|
),
|
||||||
|
}
|
||||||
|
},
|
||||||
default_models=[
|
default_models=[
|
||||||
ModelInput(
|
ModelInput(
|
||||||
model_id="${env.LLAMA_INFERENCE_MODEL:Llama3.1-8B-Instruct}"
|
model_id="${env.INFERENCE_MODEL:Llama3.2-3B-Instruct}",
|
||||||
|
provider_id="vllm-0",
|
||||||
|
),
|
||||||
|
ModelInput(
|
||||||
|
model_id="${env.SAFETY_MODEL:Llama-Guard-3-1B}",
|
||||||
|
provider_id="vllm-1",
|
||||||
),
|
),
|
||||||
ModelInput(model_id="${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}"),
|
|
||||||
],
|
],
|
||||||
default_shields=[
|
default_shields=[
|
||||||
ShieldInput(shield_id="${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}")
|
ShieldInput(shield_id="${env.SAFETY_MODEL:Llama-Guard-3-1B}")
|
||||||
],
|
],
|
||||||
env_vars={
|
docker_compose_env_vars={
|
||||||
"LLAMA_INFERENCE_VLLM_URL": "URL of the vLLM inference server",
|
# these defaults are for the Docker Compose configuration
|
||||||
"LLAMA_SAFETY_VLLM_URL": "URL of the vLLM safety server",
|
"VLLM_URL": (
|
||||||
"MAX_TOKENS": "Maximum number of tokens for generation",
|
"http://host.docker.internal:${VLLM_PORT:-5100}/v1",
|
||||||
"LLAMA_INFERENCE_MODEL": "Name of the inference model to use",
|
"URL of the vLLM server with the main inference model",
|
||||||
"LLAMA_SAFETY_MODEL": "Name of the safety model to use",
|
),
|
||||||
|
"SAFETY_VLLM_URL": (
|
||||||
|
"http://host.docker.internal:${SAFETY_VLLM_PORT:-5101}/v1",
|
||||||
|
"URL of the vLLM server with the safety model",
|
||||||
|
),
|
||||||
|
"MAX_TOKENS": (
|
||||||
|
"${MAX_TOKENS:-4096}",
|
||||||
|
"Maximum number of tokens for generation",
|
||||||
|
),
|
||||||
|
"INFERENCE_MODEL": (
|
||||||
|
"${INFERENCE_MODEL:-Llama3.2-3B-Instruct}",
|
||||||
|
"Name of the inference model to use",
|
||||||
|
),
|
||||||
|
"SAFETY_MODEL": (
|
||||||
|
"${SAFETY_MODEL:-Llama-Guard-3-1B}",
|
||||||
|
"Name of the safety (Llama-Guard) model to use",
|
||||||
|
),
|
||||||
|
"LLAMASTACK_PORT": (
|
||||||
|
"${LLAMASTACK_PORT:-5001}",
|
||||||
|
"Port for the Llama Stack distribution server",
|
||||||
|
),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue