mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 20:12:33 +00:00
fix: allow skipping model availability check for vLLM
This commit is contained in:
parent
5d711d4bcb
commit
f20eb57bef
1 changed files with 16 additions and 0 deletions
|
|
@ -77,6 +77,22 @@ class VLLMInferenceAdapter(OpenAIMixin):
|
||||||
def get_extra_client_params(self):
|
def get_extra_client_params(self):
|
||||||
return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)}
|
return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)}
|
||||||
|
|
||||||
|
async def check_model_availability(self, model: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check model availability only when api_token is configured.
|
||||||
|
Skip the check when running without authentication.
|
||||||
|
"""
|
||||||
|
if self.config.api_token:
|
||||||
|
# If we have a token, perform the normal availability check
|
||||||
|
try:
|
||||||
|
return model in [m.id async for m in self.client.models.list()]
|
||||||
|
except Exception as e:
|
||||||
|
# If listing models fails, log the error but allow the model
|
||||||
|
log.warning(f"Failed to check model availability: {e}")
|
||||||
|
return True
|
||||||
|
# Without a token, skip the check to avoid OAuth redirects
|
||||||
|
return True
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue