From 62b3ad349a61794fd414439ee12e3e62e7688f8b Mon Sep 17 00:00:00 2001 From: Nathan Weinberg <31703736+nathan-weinberg@users.noreply.github.com> Date: Mon, 3 Nov 2025 20:38:16 -0500 Subject: [PATCH] fix: return to hardcoded model IDs for Vertex AI (#4041) # What does this PR do? partial revert of b67aef2 Vertex AI doesn't offer an endpoint for listing models from Google's Model Garden Return to hardcoded values until such an endpoint is available Closes #3988 ## Test Plan Server side, set up your Vertex AI env vars (`VERTEX_AI_PROJECT`, `VERTEX_AI_LOCATION`, and `GOOGLE_APPLICATION_CREDENTIALS`) and run the starter distribution ```bash $ llama stack list-deps starter | xargs -L1 uv pip install $ llama stack run starter ``` Client side, formerly broken cURL requests now working ```bash $ curl http://127.0.0.1:8321/v1/models | jq '.data | map(select(.provider_id == "vertexai"))' [ { "identifier": "vertexai/vertex_ai/gemini-2.0-flash", "provider_resource_id": "vertex_ai/gemini-2.0-flash", "provider_id": "vertexai", "type": "model", "metadata": {}, "model_type": "llm" }, { "identifier": "vertexai/vertex_ai/gemini-2.5-flash", "provider_resource_id": "vertex_ai/gemini-2.5-flash", "provider_id": "vertexai", "type": "model", "metadata": {}, "model_type": "llm" }, { "identifier": "vertexai/vertex_ai/gemini-2.5-pro", "provider_resource_id": "vertex_ai/gemini-2.5-pro", "provider_id": "vertexai", "type": "model", "metadata": {}, "model_type": "llm" } ] $ curl -fsS http://127.0.0.1:8321/v1/openai/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"vertexai/vertex_a i/gemini-2.5-flash\", \"messages\": [{\"role\": \"user\", \"content\": \"Hello\"}], \"max_tokens\": 128, \"temperature\": 0.0}" | jq { "id": "p8oIaYiQF8_PptQPo-GH8QQ", "choices": [ { "finish_reason": "stop", "index": 0, "logprobs": null, "message": { "content": "Hello there! How can I help you today?", "refusal": null, "role": "assistant", "annotations": null, "audio": null, "function_call": null, "tool_calls": null } } ], ... ``` Signed-off-by: Nathan Weinberg --- .../providers/remote/inference/vertexai/vertexai.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/llama_stack/providers/remote/inference/vertexai/vertexai.py b/src/llama_stack/providers/remote/inference/vertexai/vertexai.py index 647c8c752..b91430fd0 100644 --- a/src/llama_stack/providers/remote/inference/vertexai/vertexai.py +++ b/src/llama_stack/providers/remote/inference/vertexai/vertexai.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from collections.abc import Iterable import google.auth.transport.requests from google.auth import default @@ -42,3 +43,12 @@ class VertexAIInferenceAdapter(OpenAIMixin): Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai """ return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi" + + async def list_provider_model_ids(self) -> Iterable[str]: + """ + VertexAI doesn't currently offer a way to query a list of available models from Google's Model Garden + For now we return a hardcoded version of the available models + + :return: An iterable of model IDs + """ + return ["vertexai/gemini-2.0-flash", "vertexai/gemini-2.5-flash", "vertexai/gemini-2.5-pro"]