Add nvidia model cache

2025-10-03 19:57:35 +00:00 · 2025-09-28 15:48:29 -07:00 · 2025-09-28 15:48:29 -07:00 · f85743dcca
commit f85743dcca
parent cf386ad8f8
1 changed files with 3 additions and 1 deletions
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@ -108,8 +108,10 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
        1. Dynamic models from https://integrate.api.nvidia.com/v1/models
        2. Static rerank models (which use different API endpoints)
        """
-        models = await super().list_models() or []
+        self._model_cache = {}
+        models = await super().list_models()

+        # Add rerank models
        existing_ids = {m.identifier for m in models}
        for model_id, _ in self._rerank_model_endpoints.items():
            if self.allowed_models and model_id not in self.allowed_models: