mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
Add nvidia model cache
This commit is contained in:
parent
cf386ad8f8
commit
f85743dcca
1 changed files with 3 additions and 1 deletions
|
@ -108,8 +108,10 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
|
|||
1. Dynamic models from https://integrate.api.nvidia.com/v1/models
|
||||
2. Static rerank models (which use different API endpoints)
|
||||
"""
|
||||
models = await super().list_models() or []
|
||||
self._model_cache = {}
|
||||
models = await super().list_models()
|
||||
|
||||
# Add rerank models
|
||||
existing_ids = {m.identifier for m in models}
|
||||
for model_id, _ in self._rerank_model_endpoints.items():
|
||||
if self.allowed_models and model_id not in self.allowed_models:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue