diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md index f6df8b1f3..3fcb16e12 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md @@ -96,5 +96,5 @@ If you are using Llama Stack Safety / Shield APIs, use: llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \ --port 5001 \ --env INFERENCE_MODEL=Llama3.2-3B-Instruct \ - --env SAFETY_MODEL=meta-Llama-Guard-3-1B + --env SAFETY_MODEL=Llama-Guard-3-1B ``` diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md index 73db33026..23302a3ab 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md @@ -76,7 +76,7 @@ docker run \ llamastack/distribution-meta-reference-quantized-gpu \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=Llama3.2-3B-Instruct \ - --env SAFETY_MODEL=meta-Llama-Guard-3-1B + --env SAFETY_MODEL=Llama-Guard-3-1B ``` ### Via Conda diff --git a/llama_stack/providers/inline/inference/meta_reference/__init__.py b/llama_stack/providers/inline/inference/meta_reference/__init__.py index 2ae3e7a16..9c923490d 100644 --- a/llama_stack/providers/inline/inference/meta_reference/__init__.py +++ b/llama_stack/providers/inline/inference/meta_reference/__init__.py @@ -17,5 +17,4 @@ async def get_provider_impl( impl = MetaReferenceInferenceImpl(config) await impl.initialize() - return impl