feat(starter)!: simplify starter distro; litellm model registry changes (#2916)

2025-07-27 06:28:50 +00:00 · 2025-07-25 15:02:04 -07:00 · 2025-07-25 15:02:04 -07:00 · 9583f468f8
commit 9583f468f8
parent 3344d8a9e5
64 changed files with 2027 additions and 4092 deletions
--- a/llama_stack/providers/remote/inference/groq/config.py
+++ b/llama_stack/providers/remote/inference/groq/config.py
@ -32,7 +32,7 @@ class GroqConfig(BaseModel):
    )

    @classmethod
-    def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY}", **kwargs) -> dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]:
        return {
            "url": "https://api.groq.com",
            "api_key": api_key,
--- a/llama_stack/providers/remote/inference/groq/groq.py
+++ b/llama_stack/providers/remote/inference/groq/groq.py
@ -34,6 +34,7 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin):
        LiteLLMOpenAIMixin.__init__(
            self,
            model_entries=MODEL_ENTRIES,
+            litellm_provider_name="groq",
            api_key_from_config=config.api_key,
            provider_data_api_key_field="groq_api_key",
        )
@ -96,7 +97,7 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin):
            tool_choice = "required"

        params = await prepare_openai_completion_params(
-            model=model_obj.provider_resource_id.replace("groq/", ""),
+            model=model_obj.provider_resource_id,
            messages=messages,
            frequency_penalty=frequency_penalty,
            function_call=function_call,
--- a/llama_stack/providers/remote/inference/groq/models.py
+++ b/llama_stack/providers/remote/inference/groq/models.py
@ -14,19 +14,19 @@ SAFETY_MODELS_ENTRIES = []

 MODEL_ENTRIES = [
    build_hf_repo_model_entry(
-        "groq/llama3-8b-8192",
+        "llama3-8b-8192",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_model_entry(
-        "groq/llama-3.1-8b-instant",
+        "llama-3.1-8b-instant",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_hf_repo_model_entry(
-        "groq/llama3-70b-8192",
+        "llama3-70b-8192",
        CoreModelId.llama3_70b_instruct.value,
    ),
    build_hf_repo_model_entry(
-        "groq/llama-3.3-70b-versatile",
+        "llama-3.3-70b-versatile",
        CoreModelId.llama3_3_70b_instruct.value,
    ),
    # Groq only contains a preview version for llama-3.2-3b
@ -34,23 +34,15 @@ MODEL_ENTRIES = [
    # to pass the test fixture
    # TODO(aidand): Replace this with a stable model once Groq supports it
    build_hf_repo_model_entry(
-        "groq/llama-3.2-3b-preview",
+        "llama-3.2-3b-preview",
        CoreModelId.llama3_2_3b_instruct.value,
    ),
    build_hf_repo_model_entry(
-        "groq/llama-4-scout-17b-16e-instruct",
+        "meta-llama/llama-4-scout-17b-16e-instruct",
        CoreModelId.llama4_scout_17b_16e_instruct.value,
    ),
    build_hf_repo_model_entry(
-        "groq/meta-llama/llama-4-scout-17b-16e-instruct",
-        CoreModelId.llama4_scout_17b_16e_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "groq/llama-4-maverick-17b-128e-instruct",
-        CoreModelId.llama4_maverick_17b_128e_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "groq/meta-llama/llama-4-maverick-17b-128e-instruct",
+        "meta-llama/llama-4-maverick-17b-128e-instruct",
        CoreModelId.llama4_maverick_17b_128e_instruct.value,
    ),
 ] + SAFETY_MODELS_ENTRIES