mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 05:52:36 +00:00
fix: handle provider registration failures gracefully
When a provider fails during model registration or listing, the stack should continue initializing rather than crashing. This allows the stack to start even if some providers are misconfigured. - Added error handling in register_resources() - Added unit tests to verify error handling behavior - Improved error logging with provider context - Removed @pytest.mark.asyncio decorators (pytest already configured with async-mode=auto) Fixes #3769
This commit is contained in:
parent
e7d21e1ee3
commit
a271e3abae
2 changed files with 235 additions and 11 deletions
|
|
@ -102,6 +102,12 @@ TEST_RECORDING_CONTEXT = None
|
|||
|
||||
|
||||
async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
|
||||
"""Register resources from the run config with their respective providers.
|
||||
|
||||
This function attempts to register each resource (models, shields, etc.) with its provider.
|
||||
If a registration fails, it logs the error and continues with other resources rather than
|
||||
crashing the entire stack.
|
||||
"""
|
||||
for rsrc, api, register_method, list_method in RESOURCES:
|
||||
objects = getattr(run_config, rsrc)
|
||||
if api not in impls:
|
||||
|
|
@ -116,20 +122,31 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
|
|||
continue
|
||||
logger.debug(f"registering {rsrc.capitalize()} {obj} for provider {obj.provider_id}")
|
||||
|
||||
# we want to maintain the type information in arguments to method.
|
||||
# instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
|
||||
# we use model_dump() to find all the attrs and then getattr to get the still typed value.
|
||||
await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
|
||||
try:
|
||||
# we want to maintain the type information in arguments to method.
|
||||
# instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
|
||||
# we use model_dump() to find all the attrs and then getattr to get the still typed value.
|
||||
await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
|
||||
except Exception as e:
|
||||
# Log the error but continue with other resources
|
||||
logger.error(
|
||||
f"Failed to register {rsrc} {obj} for provider {obj.provider_id if hasattr(obj, 'provider_id') else 'unknown'}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
method = getattr(impls[api], list_method)
|
||||
response = await method()
|
||||
try:
|
||||
method = getattr(impls[api], list_method)
|
||||
response = await method()
|
||||
|
||||
objects_to_process = response.data if hasattr(response, "data") else response
|
||||
objects_to_process = response.data if hasattr(response, "data") else response
|
||||
|
||||
for obj in objects_to_process:
|
||||
logger.debug(
|
||||
f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}",
|
||||
)
|
||||
for obj in objects_to_process:
|
||||
logger.debug(
|
||||
f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}",
|
||||
)
|
||||
except Exception as e:
|
||||
# Log the error but continue with other resource types
|
||||
logger.error(f"Failed to list {rsrc}: {e}")
|
||||
|
||||
|
||||
class EnvVarError(Exception):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue