feat: add integration tests for post_training

set inline::huggingface as the default post_training provider for the ollama distribution and add integration tests for post_training

Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
Charlie Doern 2025-05-13 17:21:30 -04:00
parent 7dcb997f17
commit ff246d890a
10 changed files with 161 additions and 53 deletions

View file

@ -441,6 +441,7 @@
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"peft",
"pillow",
"psycopg2-binary",
"pymongo",
@ -451,9 +452,11 @@
"scikit-learn",
"scipy",
"sentencepiece",
"torch",
"tqdm",
"transformers",
"tree_sitter",
"trl",
"uvicorn"
],
"open-benchmark": [

View file

@ -23,6 +23,8 @@ distribution_spec:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
post_training:
- inline::huggingface
tool_runtime:
- remote::brave-search
- remote::tavily-search

View file

@ -13,6 +13,7 @@ from llama_stack.distribution.datatypes import (
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.post_training.huggingface import HuggingFacePostTrainingConfig
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -28,6 +29,7 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"post_training": ["inline::huggingface"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
@ -47,7 +49,11 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::faiss",
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
)
posttraining_provider = Provider(
provider_id="huggingface",
provider_type="inline::huggingface",
config=HuggingFacePostTrainingConfig.sample_run_config(f"~/.llama/distributions/{name}"),
)
inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}",
provider_id="ollama",
@ -92,6 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_overrides={
"inference": [inference_provider],
"vector_io": [vector_io_provider_faiss],
"post_training": [posttraining_provider],
},
default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups,
@ -100,6 +107,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_overrides={
"inference": [inference_provider],
"vector_io": [vector_io_provider_faiss],
"post_training": [posttraining_provider],
"safety": [
Provider(
provider_id="llama-guard",

View file

@ -5,6 +5,7 @@ apis:
- datasetio
- eval
- inference
- post_training
- safety
- scoring
- telemetry
@ -80,6 +81,13 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
post_training:
- provider_id: huggingface
provider_type: inline::huggingface
config:
checkpoint_format: huggingface
distributed_backend: null
device: cpu
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search

View file

@ -5,6 +5,7 @@ apis:
- datasetio
- eval
- inference
- post_training
- safety
- scoring
- telemetry
@ -78,6 +79,13 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
post_training:
- provider_id: huggingface
provider_type: inline::huggingface
config:
checkpoint_format: huggingface
distributed_backend: null
device: cpu
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search