mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-24 16:00:01 +00:00
feat: add auto-generated CI documentation pre-commit hook (#2890)
Our CI is entirely undocumented, this commit adds a README.md file with a table of the current CI and what is does --------- Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
This commit is contained in:
parent
7f834339ba
commit
b381ed6d64
93 changed files with 495 additions and 477 deletions
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
# Copyright (c) Meta Platforms, Inc. and its affiliates.
|
||||
import math
|
||||
from logging import getLogger
|
||||
from logging import getLogger # allow-direct-logging
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
import math
|
||||
from collections import defaultdict
|
||||
from logging import getLogger
|
||||
from logging import getLogger # allow-direct-logging
|
||||
from typing import Any
|
||||
|
||||
import torch
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
import math
|
||||
from collections.abc import Callable
|
||||
from functools import partial
|
||||
|
|
@ -22,6 +21,8 @@ from PIL import Image as PIL_Image
|
|||
from torch import Tensor, nn
|
||||
from torch.distributed import _functional_collectives as funcol
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from ..model import ModelArgs, RMSNorm, apply_rotary_emb, precompute_freqs_cis
|
||||
from .encoder_utils import (
|
||||
build_encoder_attention_mask,
|
||||
|
|
@ -34,9 +35,10 @@ from .encoder_utils import (
|
|||
from .image_transform import VariableSizeImageTransform
|
||||
from .utils import get_negative_inf_value, to_2tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
MP_SCALE = 8
|
||||
|
||||
log = get_logger(name=__name__, category="core")
|
||||
|
||||
|
||||
def reduce_from_tensor_model_parallel_region(input_):
|
||||
"""All-reduce the input tensor across model parallel group."""
|
||||
|
|
@ -415,7 +417,7 @@ class VisionEncoder(nn.Module):
|
|||
)
|
||||
state_dict[prefix + "gated_positional_embedding"] = global_pos_embed
|
||||
state_dict[prefix + "gated_positional_embedding_gate"] = torch.zeros(1, dtype=global_pos_embed.dtype)
|
||||
logger.info(f"Initialized global positional embedding with size {global_pos_embed.size()}")
|
||||
log.info(f"Initialized global positional embedding with size {global_pos_embed.size()}")
|
||||
else:
|
||||
global_pos_embed = resize_global_position_embedding(
|
||||
state_dict[prefix + "gated_positional_embedding"],
|
||||
|
|
@ -423,7 +425,7 @@ class VisionEncoder(nn.Module):
|
|||
self.max_num_tiles,
|
||||
self.max_num_tiles,
|
||||
)
|
||||
logger.info(
|
||||
log.info(
|
||||
f"Resized global positional embedding from {state_dict[prefix + 'gated_positional_embedding'].size()} to {global_pos_embed.size()}"
|
||||
)
|
||||
state_dict[prefix + "gated_positional_embedding"] = global_pos_embed
|
||||
|
|
@ -771,7 +773,7 @@ class TilePositionEmbedding(nn.Module):
|
|||
if embed is not None:
|
||||
# reshape the weights to the correct shape
|
||||
nt_old, nt_old, _, w = embed.shape
|
||||
logging.info(f"Resizing tile embedding from {nt_old}x{nt_old} to {self.num_tiles}x{self.num_tiles}")
|
||||
log.info(f"Resizing tile embedding from {nt_old}x{nt_old} to {self.num_tiles}x{self.num_tiles}")
|
||||
embed_new = TilePositionEmbedding._dynamic_resize(embed, self.num_tiles)
|
||||
# assign the weights to the module
|
||||
state_dict[prefix + "embedding"] = embed_new
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from collections.abc import Collection, Iterator, Sequence, Set
|
||||
from logging import getLogger
|
||||
from logging import getLogger # allow-direct-logging
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
Literal,
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from llama_stack.log import get_logger
|
|||
|
||||
from ..datatypes import BuiltinTool, RecursiveType, ToolCall, ToolPromptFormat
|
||||
|
||||
logger = get_logger(name=__name__, category="inference")
|
||||
log = get_logger(name=__name__, category="inference")
|
||||
|
||||
BUILTIN_TOOL_PATTERN = r'\b(?P<tool_name>\w+)\.call\(query="(?P<query>[^"]*)"\)'
|
||||
CUSTOM_TOOL_CALL_PATTERN = re.compile(r"<function=(?P<function_name>[^}]+)>(?P<args>{.*?})")
|
||||
|
|
@ -215,7 +215,7 @@ class ToolUtils:
|
|||
# FIXME: Enable multiple tool calls
|
||||
return function_calls[0]
|
||||
else:
|
||||
logger.debug(f"Did not parse tool call from message body: {message_body}")
|
||||
log.debug(f"Did not parse tool call from message body: {message_body}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
import os
|
||||
from collections.abc import Callable
|
||||
|
||||
|
|
@ -13,11 +12,13 @@ from fairscale.nn.model_parallel.initialize import get_model_parallel_rank
|
|||
from torch import Tensor, nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from ...datatypes import QuantizationMode
|
||||
from ..model import Transformer, TransformerBlock
|
||||
from ..moe import MoE
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
logger = get_logger(__name__, category="core")
|
||||
|
||||
|
||||
def swiglu_wrapper_no_reduce(
|
||||
|
|
@ -186,7 +187,7 @@ def logging_callbacks(
|
|||
if use_rich_progress:
|
||||
console.print(message)
|
||||
elif rank == 0: # Only log from rank 0 for non-rich logging
|
||||
log.info(message)
|
||||
logger.info(message)
|
||||
|
||||
total_blocks = sum(1 for _, block in model.named_modules() if should_quantize_block(block))
|
||||
progress = None
|
||||
|
|
@ -220,6 +221,6 @@ def logging_callbacks(
|
|||
if completed is not None:
|
||||
progress.update(task_id, completed=completed)
|
||||
elif rank == 0 and completed and completed % 10 == 0:
|
||||
log.info(f"Rank {rank}: {completed}/{total_blocks} blocks completed")
|
||||
logger.info(f"Rank {rank}: {completed}/{total_blocks} blocks completed")
|
||||
|
||||
return progress, log_status, update_status
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from collections.abc import Collection, Iterator, Sequence, Set
|
||||
from logging import getLogger
|
||||
from logging import getLogger # allow-direct-logging
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
Literal,
|
||||
|
|
|
|||
|
|
@ -6,16 +6,17 @@
|
|||
|
||||
# type: ignore
|
||||
import collections
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(__name__, category="core")
|
||||
|
||||
try:
|
||||
import fbgemm_gpu.experimental.gen_ai # noqa: F401
|
||||
|
||||
log.info("Using efficient FP8 or INT4 operators in FBGEMM.")
|
||||
logger.info("Using efficient FP8 or INT4 operators in FBGEMM.")
|
||||
except ImportError:
|
||||
log.error("No efficient FP8 or INT4 operators. Please install FBGEMM.")
|
||||
logger.error("No efficient FP8 or INT4 operators. Please install FBGEMM.")
|
||||
raise
|
||||
|
||||
import torch
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue