feat: add auto-generated CI documentation pre-commit hook (#2890)

Our CI is entirely undocumented, this commit adds a README.md file with a table of the current CI and what is does --------- Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
2025-12-25 13:28:03 +00:00 · 2025-07-25 11:57:01 -04:00 · 2025-07-25 11:57:01 -04:00 · b381ed6d64
commit b381ed6d64
parent 7f834339ba
93 changed files with 495 additions and 477 deletions
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@ -65,7 +65,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 from .config import FireworksImplConfig
 from .models import MODEL_ENTRIES

-logger = get_logger(name=__name__, category="inference")
+log = get_logger(name=__name__, category="inference")


 class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
@ -256,7 +256,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
            "stream": bool(request.stream),
            **self._build_options(request.sampling_params, request.response_format, request.logprobs),
        }
-        logger.debug(f"params to fireworks: {params}")
+        log.debug(f"params to fireworks: {params}")

        return params

--- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@ -3,7 +3,6 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-import logging

 from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
 from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
@ -11,8 +10,6 @@ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

 from .models import MODEL_ENTRIES

-logger = logging.getLogger(__name__)
-

 class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
    """
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-import logging
 import warnings
 from collections.abc import AsyncIterator

@ -33,6 +32,7 @@ from llama_stack.apis.inference import (
    ToolChoice,
    ToolConfig,
 )
+from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
@ -54,7 +54,7 @@ from .openai_utils import (
 )
 from .utils import _is_nvidia_hosted

-logger = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="inference")


 class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper):
@ -75,7 +75,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper):
        # TODO(mf): filter by available models
        ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)

-        logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...")
+        log.info(f"Initializing NVIDIAInferenceAdapter({config.url})...")

        if _is_nvidia_hosted(config):
            if not config.api_key:
--- a/llama_stack/providers/remote/inference/nvidia/utils.py
+++ b/llama_stack/providers/remote/inference/nvidia/utils.py
@ -4,13 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-import logging

 import httpx

+from llama_stack.log import get_logger
+
 from . import NVIDIAConfig

-logger = logging.getLogger(__name__)
+log = get_logger(name=__name__, category="inference")


 def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
@ -44,7 +45,7 @@ async def check_health(config: NVIDIAConfig) -> None:
        RuntimeError: If the server is not running or ready
    """
    if not _is_nvidia_hosted(config):
-        logger.info("Checking NVIDIA NIM health...")
+        log.info("Checking NVIDIA NIM health...")
        try:
            is_live, is_ready = await _get_health(config.url)
            if not is_live:
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -85,7 +85,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (

 from .models import MODEL_ENTRIES

-logger = get_logger(name=__name__, category="inference")
+log = get_logger(name=__name__, category="inference")


 class OllamaInferenceAdapter(
@ -117,10 +117,10 @@ class OllamaInferenceAdapter(
        return self._openai_client

    async def initialize(self) -> None:
-        logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
+        log.info(f"checking connectivity to Ollama at `{self.config.url}`...")
        health_response = await self.health()
        if health_response["status"] == HealthStatus.ERROR:
-            logger.warning(
+            log.warning(
                "Ollama Server is not running, make sure to start it using `ollama serve` in a separate terminal"
            )

@ -339,7 +339,7 @@ class OllamaInferenceAdapter(
            "options": sampling_options,
            "stream": request.stream,
        }
-        logger.debug(f"params to ollama: {params}")
+        log.debug(f"params to ollama: {params}")

        return params

@ -437,7 +437,7 @@ class OllamaInferenceAdapter(
        if provider_resource_id not in available_models:
            available_models_latest = [m.model.split(":latest")[0] for m in response.models]
            if provider_resource_id in available_models_latest:
-                logger.warning(
+                log.warning(
                    f"Imprecise provider resource id was used but 'latest' is available in Ollama - using '{model.provider_resource_id}:latest'"
                )
                return model
--- a/llama_stack/providers/remote/inference/openai/openai.py
+++ b/llama_stack/providers/remote/inference/openai/openai.py
@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-import logging

 from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@ -12,8 +11,6 @@ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from .config import OpenAIConfig
 from .models import MODEL_ENTRIES

-logger = logging.getLogger(__name__)
-

 #
 # This OpenAI adapter implements Inference methods using two mixins -
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@ -5,7 +5,6 @@
 # the root directory of this source tree.


-import logging
 from collections.abc import AsyncGenerator

 from huggingface_hub import AsyncInferenceClient, HfApi
@ -34,6 +33,7 @@ from llama_stack.apis.inference import (
    ToolPromptFormat,
 )
 from llama_stack.apis.models import Model
+from llama_stack.log import get_logger
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.datatypes import ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.model_registry import (
@ -58,7 +58,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (

 from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig

-log = logging.getLogger(__name__)
+logger = get_logger(__name__, category="core")


 def build_hf_repo_model_entries():
@ -307,7 +307,7 @@ class TGIAdapter(_HfAdapter):
    async def initialize(self, config: TGIImplConfig) -> None:
        if not config.url:
            raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
-        log.info(f"Initializing TGI client with url={config.url}")
+        logger.info(f"Initializing TGI client with url={config.url}")
        self.client = AsyncInferenceClient(
            model=config.url,
        )
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@ -61,7 +61,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 from .config import TogetherImplConfig
 from .models import MODEL_ENTRIES

-logger = get_logger(name=__name__, category="inference")
+log = get_logger(name=__name__, category="inference")


 class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
@ -232,7 +232,7 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
            "stream": request.stream,
            **self._build_options(request.sampling_params, request.logprobs, request.response_format),
        }
-        logger.debug(f"params to together: {params}")
+        log.debug(f"params to together: {params}")
        return params

    async def embeddings(