feat: add auto-generated CI documentation pre-commit hook (#2890)

Our CI is entirely undocumented, this commit adds a README.md file with
a table of the current CI and what is does

---------

Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
This commit is contained in:
Nathan Weinberg 2025-07-25 11:57:01 -04:00 committed by Mustafa Elbehery
parent 7f834339ba
commit b381ed6d64
93 changed files with 495 additions and 477 deletions

View file

@ -73,11 +73,12 @@ from .config import MetaReferenceInferenceConfig
from .generators import LlamaGenerator
from .model_parallel import LlamaModelParallelGenerator
log = get_logger(__name__, category="inference")
# there's a single model parallel process running serving the model. for now,
# we don't support multiple concurrent requests to this process.
SEMAPHORE = asyncio.Semaphore(1)
logger = get_logger(__name__, category="inference")
def llama_builder_fn(config: MetaReferenceInferenceConfig, model_id: str, llama_model: Model) -> LlamaGenerator:
return LlamaGenerator(config, model_id, llama_model)
@ -144,7 +145,7 @@ class MetaReferenceInferenceImpl(
return model
async def load_model(self, model_id, llama_model) -> None:
log.info(f"Loading model `{model_id}`")
logger.info(f"Loading model `{model_id}`")
builder_params = [self.config, model_id, llama_model]
@ -166,7 +167,7 @@ class MetaReferenceInferenceImpl(
self.model_id = model_id
self.llama_model = llama_model
log.info("Warming up...")
logger.info("Warming up...")
await self.completion(
model_id=model_id,
content="Hello, world!",
@ -177,7 +178,7 @@ class MetaReferenceInferenceImpl(
messages=[UserMessage(content="Hi how are you?")],
sampling_params=SamplingParams(max_tokens=20),
)
log.info("Warmed up!")
logger.info("Warmed up!")
def check_model(self, request) -> None:
if self.model_id is None or self.llama_model is None: