mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
feat: implement graceful model discovery for vLLM provider
- Attempt model discovery first for backward compatibility - If discovery fails and refresh_models=false, continue without error - If discovery fails and refresh_models=true, fail hard with ValueError - Supports dynamic token authentication scenarios Fixes OAuth authentication issues when vLLM service requires dynamic tokens
This commit is contained in:
parent
188a56af5c
commit
2b54b57145
1 changed files with 13 additions and 11 deletions
|
@ -9,7 +9,6 @@ from typing import Any
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from openai import APIConnectionError
|
|
||||||
from openai.types.chat.chat_completion_chunk import (
|
from openai.types.chat.chat_completion_chunk import (
|
||||||
ChatCompletionChunk as OpenAIChatCompletionChunk,
|
ChatCompletionChunk as OpenAIChatCompletionChunk,
|
||||||
)
|
)
|
||||||
|
@ -339,16 +338,19 @@ class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsPro
|
||||||
pass # Ignore statically unknown model, will check live listing
|
pass # Ignore statically unknown model, will check live listing
|
||||||
try:
|
try:
|
||||||
res = self.client.models.list()
|
res = self.client.models.list()
|
||||||
except APIConnectionError as e:
|
available_models = [m.id async for m in res]
|
||||||
raise ValueError(
|
if model.provider_resource_id not in available_models:
|
||||||
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
|
raise ValueError(
|
||||||
) from e
|
f"Model {model.provider_resource_id} is not being served by vLLM. "
|
||||||
available_models = [m.id async for m in res]
|
f"Available models: {', '.join(available_models)}"
|
||||||
if model.provider_resource_id not in available_models:
|
)
|
||||||
raise ValueError(
|
except Exception as e:
|
||||||
f"Model {model.provider_resource_id} is not being served by vLLM. "
|
if self.config.refresh_models:
|
||||||
f"Available models: {', '.join(available_models)}"
|
raise ValueError(f"Model verification failed: {e}") from e
|
||||||
)
|
# if refresh_models is false, gracefully continue without verification
|
||||||
|
log.warning(f"Model verification failed for model {model.model_id} with error {e}")
|
||||||
|
log.warning("Continuing without live check (refresh_models=false).")
|
||||||
|
|
||||||
return model
|
return model
|
||||||
|
|
||||||
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue