From 085126e5301759b4b206b9b370afe3bb9b1dc685 Mon Sep 17 00:00:00 2001 From: Ken Dreyer Date: Fri, 21 Nov 2025 16:12:01 -0500 Subject: [PATCH] fix: update hard-coded google model names (#4212) When we send the model names to Google's openai API, we must use the "google" name prefix. Google does not recognize the "vertexai" model names. Closes #4211 ```bash uv venv --python python312 . .venv/bin/activate llama stack list-deps starter | xargs -L1 uv pip install llama stack run starter ``` Test that this shows the gemini models with their correct names: ```bash curl http://127.0.0.1:8321/v1/models | jq '.data | map(select(.custom_metadata.provider_id == "vertexai"))' ``` Test that this chat completion works: ```bash curl -X POST -H "Content-Type: application/json" "http://127.0.0.1:8321/v1/chat/completions" -d '{ "model": "vertexai/google/gemini-2.5-flash", "messages": [ { "role": "system", "content": "You are a helpful assistant." }, { "role": "user", "content": "Hello! Can you tell me a joke?" } ], "temperature": 1.0, "max_tokens": 256 }' ``` (cherry picked from commit dabebdd2303af1401c5dd9d92654b530c4db5050) Signed-off-by: Charlie Doern --- .../providers/remote/inference/vertexai/vertexai.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/llama_stack/providers/remote/inference/vertexai/vertexai.py b/llama_stack/providers/remote/inference/vertexai/vertexai.py index 647c8c752..7941f8c89 100644 --- a/llama_stack/providers/remote/inference/vertexai/vertexai.py +++ b/llama_stack/providers/remote/inference/vertexai/vertexai.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from collections.abc import Iterable import google.auth.transport.requests from google.auth import default @@ -42,3 +43,12 @@ class VertexAIInferenceAdapter(OpenAIMixin): Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai """ return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi" + + async def list_provider_model_ids(self) -> Iterable[str]: + """ + VertexAI doesn't currently offer a way to query a list of available models from Google's Model Garden + For now we return a hardcoded version of the available models + + :return: An iterable of model IDs + """ + return ["google/gemini-2.0-flash", "google/gemini-2.5-flash", "google/gemini-2.5-pro"]