mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
fix: add token to the openai request
OpenAIMixin expects to use an API key and creates its own AsyncOpenAI client. So our code now authenticate with the Google service, retrieves a token and pass it to the OpenAI client. Falls back to an empty string if credentials can't be obtained (letting LiteLLM handle ADC directly). Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
parent
3442f8865c
commit
73e99b6eab
2 changed files with 28 additions and 5 deletions
|
@ -6,6 +6,9 @@
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import google.auth.transport.requests
|
||||||
|
from google.auth import default
|
||||||
|
|
||||||
from llama_stack.apis.inference import ChatCompletionRequest
|
from llama_stack.apis.inference import ChatCompletionRequest
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
||||||
LiteLLMOpenAIMixin,
|
LiteLLMOpenAIMixin,
|
||||||
|
@ -28,12 +31,29 @@ class VertexAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
def get_api_key(self) -> str:
|
def get_api_key(self) -> str:
|
||||||
# Vertex AI doesn't use API keys, it uses Application Default Credentials
|
"""
|
||||||
# Return empty string to let litellm handle authentication via ADC
|
Get an access token for Vertex AI using Application Default Credentials.
|
||||||
|
|
||||||
|
Vertex AI uses ADC instead of API keys. This method obtains an access token
|
||||||
|
from the default credentials and returns it for use with the OpenAI-compatible client.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get default credentials - will read from GOOGLE_APPLICATION_CREDENTIALS
|
||||||
|
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
|
||||||
|
credentials.refresh(google.auth.transport.requests.Request())
|
||||||
|
return credentials.token
|
||||||
|
except Exception:
|
||||||
|
# If we can't get credentials, return empty string to let LiteLLM handle it
|
||||||
|
# This allows the LiteLLM mixin to work with ADC directly
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def get_base_url(self):
|
def get_base_url(self) -> str:
|
||||||
# source - https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai
|
"""
|
||||||
|
Get the Vertex AI OpenAI-compatible API base URL.
|
||||||
|
|
||||||
|
Returns the Vertex AI OpenAI-compatible endpoint URL.
|
||||||
|
Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai
|
||||||
|
"""
|
||||||
return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
|
return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
|
||||||
|
|
||||||
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
||||||
|
|
|
@ -76,6 +76,9 @@ def skip_if_doesnt_support_n(client_with_models, model_id):
|
||||||
"remote::gemini",
|
"remote::gemini",
|
||||||
# https://docs.anthropic.com/en/api/openai-sdk#simple-fields
|
# https://docs.anthropic.com/en/api/openai-sdk#simple-fields
|
||||||
"remote::anthropic",
|
"remote::anthropic",
|
||||||
|
"remote::vertexai",
|
||||||
|
# Error code: 400 - [{'error': {'code': 400, 'message': 'Unable to submit request because candidateCount must be 1 but
|
||||||
|
# the entered value was 2. Update the candidateCount value and try again.', 'status': 'INVALID_ARGUMENT'}
|
||||||
):
|
):
|
||||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")
|
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue