feat: Add Groq distribution template (#1173)

# What does this PR do?

Create a distribution template using Groq as inference provider.
Link to issue: https://github.com/meta-llama/llama-stack/issues/958


## Test Plan
Run `python llama_stack/scripts/distro_codegen.py` to generate run.yaml
and build.yaml
Test the newly created template by running
`llama stack build --template <template-name>`
`llama stack run <template-name>`
This commit is contained in:
Vladislav Bronzov 2025-02-25 23:16:56 +01:00 committed by GitHub
parent 99c1d4c456
commit 967cff4533
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 521 additions and 36 deletions

View file

@ -29,17 +29,10 @@ from llama_stack.apis.inference import (
ToolConfig,
)
from llama_stack.distribution.request_headers import NeedsRequestProviderData
from llama_stack.models.llama.datatypes import (
SamplingParams,
ToolDefinition,
ToolPromptFormat,
)
from llama_stack.models.llama.sku_list import CoreModelId
from llama_stack.models.llama.datatypes import SamplingParams, ToolDefinition, ToolPromptFormat
from llama_stack.providers.remote.inference.groq.config import GroqConfig
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
build_hf_repo_model_entry,
build_model_entry,
)
from .groq_utils import (
@ -47,33 +40,7 @@ from .groq_utils import (
convert_chat_completion_response,
convert_chat_completion_response_stream,
)
_MODEL_ENTRIES = [
build_hf_repo_model_entry(
"llama3-8b-8192",
CoreModelId.llama3_1_8b_instruct.value,
),
build_model_entry(
"llama-3.1-8b-instant",
CoreModelId.llama3_1_8b_instruct.value,
),
build_hf_repo_model_entry(
"llama3-70b-8192",
CoreModelId.llama3_70b_instruct.value,
),
build_hf_repo_model_entry(
"llama-3.3-70b-versatile",
CoreModelId.llama3_3_70b_instruct.value,
),
# Groq only contains a preview version for llama-3.2-3b
# Preview models aren't recommended for production use, but we include this one
# to pass the test fixture
# TODO(aidand): Replace this with a stable model once Groq supports it
build_hf_repo_model_entry(
"llama-3.2-3b-preview",
CoreModelId.llama3_2_3b_instruct.value,
),
]
from .models import _MODEL_ENTRIES
class GroqInferenceAdapter(Inference, ModelRegistryHelper, NeedsRequestProviderData):