mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-29 23:34:17 +00:00
Merge branch 'main' into add-watsonx-inference-adapter
This commit is contained in:
commit
7eb83264ef
116 changed files with 2286 additions and 2719 deletions
|
|
@ -55,7 +55,7 @@ from .openai_utils import (
|
|||
convert_openai_completion_choice,
|
||||
convert_openai_completion_stream,
|
||||
)
|
||||
from .utils import _is_nvidia_hosted, check_health
|
||||
from .utils import _is_nvidia_hosted
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -134,7 +134,9 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
if content_has_media(content):
|
||||
raise NotImplementedError("Media is not supported")
|
||||
|
||||
await check_health(self._config) # this raises errors
|
||||
# ToDo: check health of NeMo endpoints and enable this
|
||||
# removing this health check as NeMo customizer endpoint health check is returning 404
|
||||
# await check_health(self._config) # this raises errors
|
||||
|
||||
provider_model_id = self.get_provider_model_id(model_id)
|
||||
request = convert_completion_request(
|
||||
|
|
@ -236,7 +238,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
if tool_prompt_format:
|
||||
warnings.warn("tool_prompt_format is not supported by NVIDIA NIM, ignoring", stacklevel=2)
|
||||
|
||||
await check_health(self._config) # this raises errors
|
||||
# await check_health(self._config) # this raises errors
|
||||
|
||||
provider_model_id = self.get_provider_model_id(model_id)
|
||||
request = await convert_chat_completion_request(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue