mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-03 17:29:01 +00:00
move is_hosted out of the NVIDIAConfig api
This commit is contained in:
parent
988741c276
commit
8944491c3c
3 changed files with 7 additions and 7 deletions
|
@ -46,7 +46,3 @@ class NVIDIAConfig(BaseModel):
|
||||||
default=60,
|
default=60,
|
||||||
description="Timeout for the HTTP requests",
|
description="Timeout for the HTTP requests",
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
|
||||||
def is_hosted(self) -> bool:
|
|
||||||
return "integrate.api.nvidia.com" in self.url
|
|
||||||
|
|
|
@ -40,7 +40,7 @@ from ._openai_utils import (
|
||||||
convert_openai_chat_completion_choice,
|
convert_openai_chat_completion_choice,
|
||||||
convert_openai_chat_completion_stream,
|
convert_openai_chat_completion_stream,
|
||||||
)
|
)
|
||||||
from ._utils import check_health
|
from ._utils import _is_nvidia_hosted, check_health
|
||||||
|
|
||||||
_MODEL_ALIASES = [
|
_MODEL_ALIASES = [
|
||||||
build_model_alias_with_just_provider_model_id(
|
build_model_alias_with_just_provider_model_id(
|
||||||
|
@ -91,7 +91,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
||||||
|
|
||||||
print(f"Initializing NVIDIAInferenceAdapter({config.url})...")
|
print(f"Initializing NVIDIAInferenceAdapter({config.url})...")
|
||||||
|
|
||||||
if config.is_hosted:
|
if _is_nvidia_hosted(config):
|
||||||
if not config.api_key:
|
if not config.api_key:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"API key is required for hosted NVIDIA NIM. "
|
"API key is required for hosted NVIDIA NIM. "
|
||||||
|
|
|
@ -11,6 +11,10 @@ import httpx
|
||||||
from ._config import NVIDIAConfig
|
from ._config import NVIDIAConfig
|
||||||
|
|
||||||
|
|
||||||
|
def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
|
||||||
|
return "integrate.api.nvidia.com" in config.url
|
||||||
|
|
||||||
|
|
||||||
async def _get_health(url: str) -> Tuple[bool, bool]:
|
async def _get_health(url: str) -> Tuple[bool, bool]:
|
||||||
"""
|
"""
|
||||||
Query {url}/v1/health/{live,ready} to check if the server is running and ready
|
Query {url}/v1/health/{live,ready} to check if the server is running and ready
|
||||||
|
@ -37,7 +41,7 @@ async def check_health(config: NVIDIAConfig) -> None:
|
||||||
Raises:
|
Raises:
|
||||||
RuntimeError: If the server is not running or ready
|
RuntimeError: If the server is not running or ready
|
||||||
"""
|
"""
|
||||||
if not config.is_hosted:
|
if not _is_nvidia_hosted(config):
|
||||||
print("Checking NVIDIA NIM health...")
|
print("Checking NVIDIA NIM health...")
|
||||||
try:
|
try:
|
||||||
is_live, is_ready = await _get_health(config.url)
|
is_live, is_ready = await _get_health(config.url)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue