From f85743dcca1e8d594d7c54c05d52224128db3682 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Sun, 28 Sep 2025 15:48:29 -0700 Subject: [PATCH] Add nvidia model cache --- llama_stack/providers/remote/inference/nvidia/nvidia.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 1fc6a23b1..f6fca4014 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -108,8 +108,10 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): 1. Dynamic models from https://integrate.api.nvidia.com/v1/models 2. Static rerank models (which use different API endpoints) """ - models = await super().list_models() or [] + self._model_cache = {} + models = await super().list_models() + # Add rerank models existing_ids = {m.identifier for m in models} for model_id, _ in self._rerank_model_endpoints.items(): if self.allowed_models and model_id not in self.allowed_models: