all distros

This commit is contained in:
Xi Yan 2024-12-03 20:49:30 -08:00
parent a097bfa761
commit 7103892f54
16 changed files with 79 additions and 62 deletions

View file

@ -6,6 +6,9 @@
from pathlib import Path from pathlib import Path
from llama_stack.distribution.datatypes import Provider
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -20,9 +23,15 @@ def get_distribution_template() -> DistributionTemplate:
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
} }
name = "bedrock"
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
return DistributionTemplate( return DistributionTemplate(
name="bedrock", name=name,
distro_type="self_hosted", distro_type="self_hosted",
description="Use AWS Bedrock for running LLM inference and safety", description="Use AWS Bedrock for running LLM inference and safety",
docker_image=None, docker_image=None,
@ -30,7 +39,11 @@ def get_distribution_template() -> DistributionTemplate:
providers=providers, providers=providers,
default_models=[], default_models=[],
run_configs={ run_configs={
"run.yaml": RunConfigSettings(), "run.yaml": RunConfigSettings(
provider_overrides={
"memory": [memory_provider],
},
),
}, },
run_config_env_vars={ run_config_env_vars={
"LLAMASTACK_PORT": ( "LLAMASTACK_PORT": (

View file

@ -24,12 +24,6 @@ providers:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db
- provider_id: chromadb
provider_type: remote::chromadb
config: {}
- provider_id: pgvector
provider_type: remote::pgvector
config: {}
safety: safety:
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock

View file

@ -9,6 +9,7 @@ from pathlib import Path
from llama_models.sku_list import all_registered_models from llama_models.sku_list import all_registered_models
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
@ -27,11 +28,18 @@ def get_distribution_template() -> DistributionTemplate:
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
} }
name = "fireworks"
inference_provider = Provider( inference_provider = Provider(
provider_id="fireworks", provider_id="fireworks",
provider_type="remote::fireworks", provider_type="remote::fireworks",
config=FireworksImplConfig.sample_run_config(), config=FireworksImplConfig.sample_run_config(),
) )
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
core_model_to_hf_repo = { core_model_to_hf_repo = {
m.descriptor(): m.huggingface_repo for m in all_registered_models() m.descriptor(): m.huggingface_repo for m in all_registered_models()
@ -45,7 +53,7 @@ def get_distribution_template() -> DistributionTemplate:
] ]
return DistributionTemplate( return DistributionTemplate(
name="fireworks", name=name,
distro_type="self_hosted", distro_type="self_hosted",
description="Use Fireworks.AI for running LLM inference", description="Use Fireworks.AI for running LLM inference",
docker_image=None, docker_image=None,
@ -56,6 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider], "inference": [inference_provider],
"memory": [memory_provider],
}, },
default_models=default_models, default_models=default_models,
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],

View file

@ -26,12 +26,6 @@ providers:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
- provider_id: chromadb
provider_type: remote::chromadb
config: {}
- provider_id: pgvector
provider_type: remote::pgvector
config: {}
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -5,6 +5,7 @@
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -20,12 +21,17 @@ def get_distribution_template() -> DistributionTemplate:
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
} }
name = "hf-endpoint"
inference_provider = Provider( inference_provider = Provider(
provider_id="hf-endpoint", provider_id="hf-endpoint",
provider_type="remote::hf::endpoint", provider_type="remote::hf::endpoint",
config=InferenceEndpointImplConfig.sample_run_config(), config=InferenceEndpointImplConfig.sample_run_config(),
) )
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
inference_model = ModelInput( inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}", model_id="${env.INFERENCE_MODEL}",
@ -37,7 +43,7 @@ def get_distribution_template() -> DistributionTemplate:
) )
return DistributionTemplate( return DistributionTemplate(
name="hf-endpoint", name=name,
distro_type="self_hosted", distro_type="self_hosted",
description="Use (an external) Hugging Face Inference Endpoint for running LLM inference", description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
docker_image=None, docker_image=None,
@ -48,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider], "inference": [inference_provider],
"memory": [memory_provider],
}, },
default_models=[inference_model], default_models=[inference_model],
), ),

View file

@ -26,12 +26,6 @@ providers:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db
- provider_id: chromadb
provider_type: remote::chromadb
config: {}
- provider_id: pgvector
provider_type: remote::pgvector
config: {}
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -5,6 +5,7 @@
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -21,11 +22,17 @@ def get_distribution_template() -> DistributionTemplate:
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
} }
name = "hf-serverless"
inference_provider = Provider( inference_provider = Provider(
provider_id="hf-serverless", provider_id="hf-serverless",
provider_type="remote::hf::serverless", provider_type="remote::hf::serverless",
config=InferenceAPIImplConfig.sample_run_config(), config=InferenceAPIImplConfig.sample_run_config(),
) )
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
inference_model = ModelInput( inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}", model_id="${env.INFERENCE_MODEL}",
@ -37,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
) )
return DistributionTemplate( return DistributionTemplate(
name="hf-serverless", name=name,
distro_type="self_hosted", distro_type="self_hosted",
description="Use (an external) Hugging Face Inference Endpoint for running LLM inference", description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
docker_image=None, docker_image=None,
@ -48,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider], "inference": [inference_provider],
"memory": [memory_provider],
}, },
default_models=[inference_model], default_models=[inference_model],
), ),

View file

@ -26,12 +26,6 @@ providers:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db
- provider_id: chromadb
provider_type: remote::chromadb
config: {}
- provider_id: pgvector
provider_type: remote::pgvector
config: {}
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.inline.inference.meta_reference import ( from llama_stack.providers.inline.inference.meta_reference import (
MetaReferenceInferenceConfig, MetaReferenceInferenceConfig,
) )
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -24,7 +25,7 @@ def get_distribution_template() -> DistributionTemplate:
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
} }
name = "meta-reference-gpu"
inference_provider = Provider( inference_provider = Provider(
provider_id="meta-reference-inference", provider_id="meta-reference-inference",
provider_type="inline::meta-reference", provider_type="inline::meta-reference",
@ -33,6 +34,11 @@ def get_distribution_template() -> DistributionTemplate:
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}", checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
), ),
) )
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
inference_model = ModelInput( inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}", model_id="${env.INFERENCE_MODEL}",
@ -44,7 +50,7 @@ def get_distribution_template() -> DistributionTemplate:
) )
return DistributionTemplate( return DistributionTemplate(
name="meta-reference-gpu", name=name,
distro_type="self_hosted", distro_type="self_hosted",
description="Use Meta Reference for running LLM inference", description="Use Meta Reference for running LLM inference",
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
@ -54,6 +60,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider], "inference": [inference_provider],
"memory": [memory_provider],
}, },
default_models=[inference_model], default_models=[inference_model],
), ),

View file

@ -27,12 +27,6 @@ providers:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
- provider_id: chromadb
provider_type: remote::chromadb
config: {}
- provider_id: pgvector
provider_type: remote::pgvector
config: {}
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider
from llama_stack.providers.inline.inference.meta_reference import ( from llama_stack.providers.inline.inference.meta_reference import (
MetaReferenceQuantizedInferenceConfig, MetaReferenceQuantizedInferenceConfig,
) )
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -24,7 +25,7 @@ def get_distribution_template() -> DistributionTemplate:
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
} }
name = "meta-reference-quantized-gpu"
inference_provider = Provider( inference_provider = Provider(
provider_id="meta-reference-inference", provider_id="meta-reference-inference",
provider_type="inline::meta-reference-quantized", provider_type="inline::meta-reference-quantized",
@ -33,13 +34,18 @@ def get_distribution_template() -> DistributionTemplate:
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}", checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
), ),
) )
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
inference_model = ModelInput( inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}", model_id="${env.INFERENCE_MODEL}",
provider_id="meta-reference-inference", provider_id="meta-reference-inference",
) )
return DistributionTemplate( return DistributionTemplate(
name="meta-reference-quantized-gpu", name=name,
distro_type="self_hosted", distro_type="self_hosted",
description="Use Meta Reference with fp8, int4 quantization for running LLM inference", description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
@ -49,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider], "inference": [inference_provider],
"memory": [memory_provider],
}, },
default_models=[inference_model], default_models=[inference_model],
), ),

View file

@ -29,12 +29,6 @@ providers:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/faiss_store.db
- provider_id: chromadb
provider_type: remote::chromadb
config: {}
- provider_id: pgvector
provider_type: remote::pgvector
config: {}
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -24,12 +24,6 @@ providers:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
- provider_id: chromadb
provider_type: remote::chromadb
config: {}
- provider_id: pgvector
provider_type: remote::pgvector
config: {}
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -7,6 +7,7 @@
from pathlib import Path from pathlib import Path
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -19,7 +20,7 @@ def get_distribution_template() -> DistributionTemplate:
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
} }
name = "remote-vllm"
inference_provider = Provider( inference_provider = Provider(
provider_id="vllm-inference", provider_id="vllm-inference",
provider_type="remote::vllm", provider_type="remote::vllm",
@ -27,6 +28,11 @@ def get_distribution_template() -> DistributionTemplate:
url="${env.VLLM_URL}", url="${env.VLLM_URL}",
), ),
) )
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
inference_model = ModelInput( inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}", model_id="${env.INFERENCE_MODEL}",
@ -38,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
) )
return DistributionTemplate( return DistributionTemplate(
name="remote-vllm", name=name,
distro_type="self_hosted", distro_type="self_hosted",
description="Use (an external) vLLM server for running LLM inference", description="Use (an external) vLLM server for running LLM inference",
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
@ -48,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider], "inference": [inference_provider],
"memory": [memory_provider],
}, },
default_models=[inference_model], default_models=[inference_model],
), ),

View file

@ -25,12 +25,6 @@ providers:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
- provider_id: chromadb
provider_type: remote::chromadb
config: {}
- provider_id: pgvector
provider_type: remote::pgvector
config: {}
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -7,6 +7,7 @@
from pathlib import Path from pathlib import Path
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.tgi import TGIImplConfig from llama_stack.providers.remote.inference.tgi import TGIImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -22,7 +23,7 @@ def get_distribution_template() -> DistributionTemplate:
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
} }
name = "tgi"
inference_provider = Provider( inference_provider = Provider(
provider_id="tgi-inference", provider_id="tgi-inference",
provider_type="remote::tgi", provider_type="remote::tgi",
@ -30,6 +31,11 @@ def get_distribution_template() -> DistributionTemplate:
url="${env.TGI_URL}", url="${env.TGI_URL}",
), ),
) )
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
inference_model = ModelInput( inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}", model_id="${env.INFERENCE_MODEL}",
@ -41,7 +47,7 @@ def get_distribution_template() -> DistributionTemplate:
) )
return DistributionTemplate( return DistributionTemplate(
name="tgi", name=name,
distro_type="self_hosted", distro_type="self_hosted",
description="Use (an external) TGI server for running LLM inference", description="Use (an external) TGI server for running LLM inference",
docker_image=None, docker_image=None,
@ -52,6 +58,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider], "inference": [inference_provider],
"memory": [memory_provider],
}, },
default_models=[inference_model], default_models=[inference_model],
), ),