chore: update the vertexai inference impl to use openai-python for openai-compat functions

This commit is contained in:
Matthew Farrellee 2025-09-08 13:16:53 -04:00
parent ef02b9ea10
commit f9296d2d91
2 changed files with 7 additions and 2 deletions

View file

@ -218,7 +218,7 @@ def available_providers() -> list[ProviderSpec]:
api=Api.inference, api=Api.inference,
adapter=AdapterSpec( adapter=AdapterSpec(
adapter_type="vertexai", adapter_type="vertexai",
pip_packages=["litellm", "google-cloud-aiplatform"], pip_packages=["litellm", "google-cloud-aiplatform", "openai"],
module="llama_stack.providers.remote.inference.vertexai", module="llama_stack.providers.remote.inference.vertexai",
config_class="llama_stack.providers.remote.inference.vertexai.VertexAIConfig", config_class="llama_stack.providers.remote.inference.vertexai.VertexAIConfig",
provider_data_validator="llama_stack.providers.remote.inference.vertexai.config.VertexAIProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.vertexai.config.VertexAIProviderDataValidator",

View file

@ -10,12 +10,13 @@ from llama_stack.apis.inference import ChatCompletionRequest
from llama_stack.providers.utils.inference.litellm_openai_mixin import ( from llama_stack.providers.utils.inference.litellm_openai_mixin import (
LiteLLMOpenAIMixin, LiteLLMOpenAIMixin,
) )
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import VertexAIConfig from .config import VertexAIConfig
from .models import MODEL_ENTRIES from .models import MODEL_ENTRIES
class VertexAIInferenceAdapter(LiteLLMOpenAIMixin): class VertexAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
def __init__(self, config: VertexAIConfig) -> None: def __init__(self, config: VertexAIConfig) -> None:
LiteLLMOpenAIMixin.__init__( LiteLLMOpenAIMixin.__init__(
self, self,
@ -31,6 +32,10 @@ class VertexAIInferenceAdapter(LiteLLMOpenAIMixin):
# Return empty string to let litellm handle authentication via ADC # Return empty string to let litellm handle authentication via ADC
return "" return ""
def get_base_url(self):
# source - https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai
return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]: async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
# Get base parameters from parent # Get base parameters from parent
params = await super()._get_params(request) params = await super()._get_params(request)