mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
Fix rerank model endpoint issue
This commit is contained in:
parent
f85743dcca
commit
2fb8756fe2
2 changed files with 24 additions and 8 deletions
|
@ -138,10 +138,9 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
|
|||
provider_model_id = await self._get_provider_model_id(model)
|
||||
|
||||
ranking_url = self.get_base_url()
|
||||
model_obj = await self.model_store.get_model(model)
|
||||
|
||||
if _is_nvidia_hosted(self._config) and "endpoint" in model_obj.metadata:
|
||||
ranking_url = model_obj.metadata["endpoint"]
|
||||
if _is_nvidia_hosted(self._config) and provider_model_id in self._rerank_model_endpoints:
|
||||
ranking_url = self._rerank_model_endpoints[provider_model_id]
|
||||
|
||||
logger.debug(f"Using rerank endpoint: {ranking_url} for model: {provider_model_id}")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue