Merge branch 'main' into add-watsonx-inference-adapter

2025-12-29 23:34:17 +00:00 · 2025-03-27 09:40:51 +05:30 · 2025-03-27 09:40:51 +05:30 · 7eb83264ef
commit 7eb83264ef
parent 4b53171139 193e531216
116 changed files with 2286 additions and 2719 deletions
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@ -55,7 +55,7 @@ from .openai_utils import (
    convert_openai_completion_choice,
    convert_openai_completion_stream,
 )
-from .utils import _is_nvidia_hosted, check_health
+from .utils import _is_nvidia_hosted

 logger = logging.getLogger(__name__)

@ -134,7 +134,9 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
        if content_has_media(content):
            raise NotImplementedError("Media is not supported")

-        await check_health(self._config)  # this raises errors
+        # ToDo: check health of NeMo endpoints and enable this
+        # removing this health check as NeMo customizer endpoint health check is returning 404
+        # await check_health(self._config)  # this raises errors

        provider_model_id = self.get_provider_model_id(model_id)
        request = convert_completion_request(
@ -236,7 +238,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
        if tool_prompt_format:
            warnings.warn("tool_prompt_format is not supported by NVIDIA NIM, ignoring", stacklevel=2)

-        await check_health(self._config)  # this raises errors
+        # await check_health(self._config)  # this raises errors

        provider_model_id = self.get_provider_model_id(model_id)
        request = await convert_chat_completion_request(