mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-04 02:03:44 +00:00
# What does this PR do?
partial revert of b67aef2
Vertex AI doesn't offer an endpoint for listing models from Google's
Model Garden
Return to hardcoded values until such an endpoint is available
Closes #3988
## Test Plan
Server side, set up your Vertex AI env vars (`VERTEX_AI_PROJECT`,
`VERTEX_AI_LOCATION`, and `GOOGLE_APPLICATION_CREDENTIALS`) and run the
starter distribution
```bash
$ llama stack list-deps starter | xargs -L1 uv pip install
$ llama stack run starter
```
Client side, formerly broken cURL requests now working
```bash
$ curl http://127.0.0.1:8321/v1/models | jq '.data | map(select(.provider_id == "vertexai"))'
[
{
"identifier": "vertexai/vertex_ai/gemini-2.0-flash",
"provider_resource_id": "vertex_ai/gemini-2.0-flash",
"provider_id": "vertexai",
"type": "model",
"metadata": {},
"model_type": "llm"
},
{
"identifier": "vertexai/vertex_ai/gemini-2.5-flash",
"provider_resource_id": "vertex_ai/gemini-2.5-flash",
"provider_id": "vertexai",
"type": "model",
"metadata": {},
"model_type": "llm"
},
{
"identifier": "vertexai/vertex_ai/gemini-2.5-pro",
"provider_resource_id": "vertex_ai/gemini-2.5-pro",
"provider_id": "vertexai",
"type": "model",
"metadata": {},
"model_type": "llm"
}
]
$ curl -fsS http://127.0.0.1:8321/v1/openai/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"vertexai/vertex_a
i/gemini-2.5-flash\", \"messages\": [{\"role\": \"user\", \"content\": \"Hello\"}], \"max_tokens\": 128, \"temperature\": 0.0}" | jq
{
"id": "p8oIaYiQF8_PptQPo-GH8QQ",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello there! How can I help you today?",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
...
```
Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
54 lines
2.1 KiB
Python
54 lines
2.1 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from collections.abc import Iterable
|
|
|
|
import google.auth.transport.requests
|
|
from google.auth import default
|
|
|
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
|
|
from .config import VertexAIConfig
|
|
|
|
|
|
class VertexAIInferenceAdapter(OpenAIMixin):
|
|
config: VertexAIConfig
|
|
|
|
provider_data_api_key_field: str = "vertex_project"
|
|
|
|
def get_api_key(self) -> str:
|
|
"""
|
|
Get an access token for Vertex AI using Application Default Credentials.
|
|
|
|
Vertex AI uses ADC instead of API keys. This method obtains an access token
|
|
from the default credentials and returns it for use with the OpenAI-compatible client.
|
|
"""
|
|
try:
|
|
# Get default credentials - will read from GOOGLE_APPLICATION_CREDENTIALS
|
|
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
|
|
credentials.refresh(google.auth.transport.requests.Request())
|
|
return str(credentials.token)
|
|
except Exception:
|
|
# If we can't get credentials, return empty string to let the env work with ADC directly
|
|
return ""
|
|
|
|
def get_base_url(self) -> str:
|
|
"""
|
|
Get the Vertex AI OpenAI-compatible API base URL.
|
|
|
|
Returns the Vertex AI OpenAI-compatible endpoint URL.
|
|
Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai
|
|
"""
|
|
return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
|
|
|
|
async def list_provider_model_ids(self) -> Iterable[str]:
|
|
"""
|
|
VertexAI doesn't currently offer a way to query a list of available models from Google's Model Garden
|
|
For now we return a hardcoded version of the available models
|
|
|
|
:return: An iterable of model IDs
|
|
"""
|
|
return ["vertexai/gemini-2.0-flash", "vertexai/gemini-2.5-flash", "vertexai/gemini-2.5-pro"]
|