mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
Add nvidia model cache
This commit is contained in:
parent
cf386ad8f8
commit
f85743dcca
1 changed files with 3 additions and 1 deletions
|
@ -108,8 +108,10 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
|
||||||
1. Dynamic models from https://integrate.api.nvidia.com/v1/models
|
1. Dynamic models from https://integrate.api.nvidia.com/v1/models
|
||||||
2. Static rerank models (which use different API endpoints)
|
2. Static rerank models (which use different API endpoints)
|
||||||
"""
|
"""
|
||||||
models = await super().list_models() or []
|
self._model_cache = {}
|
||||||
|
models = await super().list_models()
|
||||||
|
|
||||||
|
# Add rerank models
|
||||||
existing_ids = {m.identifier for m in models}
|
existing_ids = {m.identifier for m in models}
|
||||||
for model_id, _ in self._rerank_model_endpoints.items():
|
for model_id, _ in self._rerank_model_endpoints.items():
|
||||||
if self.allowed_models and model_id not in self.allowed_models:
|
if self.allowed_models and model_id not in self.allowed_models:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue