diff --git a/docs/source/distributions/self_hosted_distro/groq.md b/docs/source/distributions/self_hosted_distro/groq.md index 4db76a3bf..ce3f8aecc 100644 --- a/docs/source/distributions/self_hosted_distro/groq.md +++ b/docs/source/distributions/self_hosted_distro/groq.md @@ -37,11 +37,11 @@ The following environment variables can be configured: The following models are available by default: -- `groq/llama3-8b-8192 ` +- `groq/llama3-8b-8192 (aliases: meta-llama/Llama-3.1-8B-Instruct)` - `groq/llama-3.1-8b-instant ` -- `groq/llama3-70b-8192 ` -- `groq/llama-3.3-70b-versatile ` -- `groq/llama-3.2-3b-preview ` +- `groq/llama3-70b-8192 (aliases: meta-llama/Llama-3-70B-Instruct)` +- `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)` ### Prerequisite: API Keys diff --git a/llama_stack/providers/remote/inference/groq/models.py b/llama_stack/providers/remote/inference/groq/models.py index 4364edffa..08b9b4dc4 100644 --- a/llama_stack/providers/remote/inference/groq/models.py +++ b/llama_stack/providers/remote/inference/groq/models.py @@ -5,10 +5,13 @@ # the root directory of this source tree. from llama_stack.models.llama.sku_list import CoreModelId -from llama_stack.providers.utils.inference.model_registry import build_model_entry +from llama_stack.providers.utils.inference.model_registry import ( + build_hf_repo_model_entry, + build_model_entry, +) MODEL_ENTRIES = [ - build_model_entry( + build_hf_repo_model_entry( "groq/llama3-8b-8192", CoreModelId.llama3_1_8b_instruct.value, ), @@ -16,11 +19,11 @@ MODEL_ENTRIES = [ "groq/llama-3.1-8b-instant", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_entry( + build_hf_repo_model_entry( "groq/llama3-70b-8192", CoreModelId.llama3_70b_instruct.value, ), - build_model_entry( + build_hf_repo_model_entry( "groq/llama-3.3-70b-versatile", CoreModelId.llama3_3_70b_instruct.value, ), @@ -28,7 +31,7 @@ MODEL_ENTRIES = [ # Preview models aren't recommended for production use, but we include this one # to pass the test fixture # TODO(aidand): Replace this with a stable model once Groq supports it - build_model_entry( + build_hf_repo_model_entry( "groq/llama-3.2-3b-preview", CoreModelId.llama3_2_3b_instruct.value, ), diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml index 1a08082bd..f1d72d572 100644 --- a/llama_stack/templates/dev/run.yaml +++ b/llama_stack/templates/dev/run.yaml @@ -301,6 +301,11 @@ models: provider_id: groq provider_model_id: groq/llama3-8b-8192 model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: groq + provider_model_id: groq/llama3-8b-8192 + model_type: llm - metadata: {} model_id: groq/llama-3.1-8b-instant provider_id: groq @@ -311,16 +316,31 @@ models: provider_id: groq provider_model_id: groq/llama3-70b-8192 model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3-70B-Instruct + provider_id: groq + provider_model_id: groq/llama3-70b-8192 + model_type: llm - metadata: {} model_id: groq/llama-3.3-70b-versatile provider_id: groq provider_model_id: groq/llama-3.3-70b-versatile model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: groq + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm - metadata: {} model_id: groq/llama-3.2-3b-preview provider_id: groq provider_model_id: groq/llama-3.2-3b-preview model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: groq + provider_model_id: groq/llama-3.2-3b-preview + model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index 8243d78c6..78212c8d9 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -95,6 +95,11 @@ models: provider_id: groq provider_model_id: groq/llama3-8b-8192 model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: groq + provider_model_id: groq/llama3-8b-8192 + model_type: llm - metadata: {} model_id: groq/llama-3.1-8b-instant provider_id: groq @@ -105,16 +110,31 @@ models: provider_id: groq provider_model_id: groq/llama3-70b-8192 model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3-70B-Instruct + provider_id: groq + provider_model_id: groq/llama3-70b-8192 + model_type: llm - metadata: {} model_id: groq/llama-3.3-70b-versatile provider_id: groq provider_model_id: groq/llama-3.3-70b-versatile model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: groq + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm - metadata: {} model_id: groq/llama-3.2-3b-preview provider_id: groq provider_model_id: groq/llama-3.2-3b-preview model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: groq + provider_model_id: groq/llama-3.2-3b-preview + model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2