fold in meta-reference-quantized

This commit is contained in:
Ashwin Bharambe 2025-04-07 11:15:27 -07:00
parent cfaf9e0e8b
commit ff6c47d4e5
9 changed files with 24 additions and 439 deletions

View file

@ -24,6 +24,8 @@ META_REFERENCE_DEPS = [
"zmq",
"lm-format-enforcer",
"sentence-transformers",
"torchao==0.5.0",
"fbgemm-gpu-genai==1.1.2",
]
@ -36,13 +38,6 @@ def available_providers() -> List[ProviderSpec]:
module="llama_stack.providers.inline.inference.meta_reference",
config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
),
InlineProviderSpec(
api=Api.inference,
provider_type="inline::meta-reference-quantized",
pip_packages=META_REFERENCE_DEPS + ["fbgemm-gpu", "torchao==0.5.0"],
module="llama_stack.providers.inline.inference.meta_reference",
config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceQuantizedInferenceConfig",
),
InlineProviderSpec(
api=Api.inference,
provider_type="inline::vllm",