mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-27 06:28:50 +00:00
feat: Add Google Vertex AI inference provider support
- Add new Vertex AI remote inference provider with litellm integration - Support for Gemini models through Google Cloud Vertex AI platform - Uses Google Cloud Application Default Credentials (ADC) for authentication - Added VertexAI models: gemini-2.5-flash, gemini-2.5-pro, gemini-2.0-flash. - Updated provider registry to include vertexai provider - Updated starter template to support Vertex AI configuration - Added comprehensive documentation and sample configuration Signed-off-by: Eran Cohen <eranco@redhat.com>
This commit is contained in:
parent
c0563c0560
commit
1f421238b8
12 changed files with 311 additions and 0 deletions
|
@ -18,6 +18,7 @@ distribution_spec:
|
|||
- remote::openai
|
||||
- remote::anthropic
|
||||
- remote::gemini
|
||||
- remote::vertexai
|
||||
- remote::groq
|
||||
- remote::llama-openai-compat
|
||||
- remote::sambanova
|
||||
|
|
|
@ -85,6 +85,11 @@ providers:
|
|||
provider_type: remote::gemini
|
||||
config:
|
||||
api_key: ${env.GEMINI_API_KEY}
|
||||
- provider_id: ${env.ENABLE_VERTEXAI:=__disabled__}
|
||||
provider_type: remote::vertexai
|
||||
config:
|
||||
project: ${env.VERTEX_AI_PROJECT}
|
||||
location: ${env.VERTEX_AI_LOCATION:=us-central1}
|
||||
- provider_id: ${env.ENABLE_GROQ:=__disabled__}
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
|
@ -963,6 +968,35 @@ models:
|
|||
provider_id: ${env.ENABLE_GEMINI:=__disabled__}
|
||||
provider_model_id: gemini/text-embedding-004
|
||||
model_type: embedding
|
||||
- metadata: {}
|
||||
model_id: ${env.ENABLE_VERTEXAI:=__disabled__}/vertex_ai/gemini-2.0-flash
|
||||
provider_id: ${env.ENABLE_VERTEXAI:=__disabled__}
|
||||
provider_model_id: vertex_ai/gemini-2.0-flash
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.ENABLE_VERTEXAI:=__disabled__}/vertex_ai/gemini-2.5-flash
|
||||
provider_id: ${env.ENABLE_VERTEXAI:=__disabled__}
|
||||
provider_model_id: vertex_ai/gemini-2.5-flash
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.ENABLE_VERTEXAI:=__disabled__}/vertex_ai/gemini-2.5-pro
|
||||
provider_id: ${env.ENABLE_VERTEXAI:=__disabled__}
|
||||
provider_model_id: vertex_ai/gemini-2.5-pro
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
context_length: 2048
|
||||
model_id: ${env.ENABLE_VERTEXAI:=__disabled__}/vertex_ai/text-embedding-004
|
||||
provider_id: ${env.ENABLE_VERTEXAI:=__disabled__}
|
||||
provider_model_id: vertex_ai/text-embedding-004
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
context_length: 2048
|
||||
model_id: ${env.ENABLE_VERTEXAI:=__disabled__}/vertex_ai/text-embedding-005
|
||||
provider_id: ${env.ENABLE_VERTEXAI:=__disabled__}
|
||||
provider_model_id: vertex_ai/text-embedding-005
|
||||
model_type: embedding
|
||||
- metadata: {}
|
||||
model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama3-8b-8192
|
||||
provider_id: ${env.ENABLE_GROQ:=__disabled__}
|
||||
|
|
|
@ -18,6 +18,7 @@ distribution_spec:
|
|||
- remote::openai
|
||||
- remote::anthropic
|
||||
- remote::gemini
|
||||
- remote::vertexai
|
||||
- remote::groq
|
||||
- remote::llama-openai-compat
|
||||
- remote::sambanova
|
||||
|
|
|
@ -85,6 +85,11 @@ providers:
|
|||
provider_type: remote::gemini
|
||||
config:
|
||||
api_key: ${env.GEMINI_API_KEY}
|
||||
- provider_id: ${env.ENABLE_VERTEXAI:=__disabled__}
|
||||
provider_type: remote::vertexai
|
||||
config:
|
||||
project: ${env.VERTEX_AI_PROJECT}
|
||||
location: ${env.VERTEX_AI_LOCATION:=us-central1}
|
||||
- provider_id: ${env.ENABLE_GROQ:=__disabled__}
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
|
@ -963,6 +968,35 @@ models:
|
|||
provider_id: ${env.ENABLE_GEMINI:=__disabled__}
|
||||
provider_model_id: gemini/text-embedding-004
|
||||
model_type: embedding
|
||||
- metadata: {}
|
||||
model_id: ${env.ENABLE_VERTEXAI:=__disabled__}/vertex_ai/gemini-2.0-flash
|
||||
provider_id: ${env.ENABLE_VERTEXAI:=__disabled__}
|
||||
provider_model_id: vertex_ai/gemini-2.0-flash
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.ENABLE_VERTEXAI:=__disabled__}/vertex_ai/gemini-2.5-flash
|
||||
provider_id: ${env.ENABLE_VERTEXAI:=__disabled__}
|
||||
provider_model_id: vertex_ai/gemini-2.5-flash
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.ENABLE_VERTEXAI:=__disabled__}/vertex_ai/gemini-2.5-pro
|
||||
provider_id: ${env.ENABLE_VERTEXAI:=__disabled__}
|
||||
provider_model_id: vertex_ai/gemini-2.5-pro
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
context_length: 2048
|
||||
model_id: ${env.ENABLE_VERTEXAI:=__disabled__}/vertex_ai/text-embedding-004
|
||||
provider_id: ${env.ENABLE_VERTEXAI:=__disabled__}
|
||||
provider_model_id: vertex_ai/text-embedding-004
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
context_length: 2048
|
||||
model_id: ${env.ENABLE_VERTEXAI:=__disabled__}/vertex_ai/text-embedding-005
|
||||
provider_id: ${env.ENABLE_VERTEXAI:=__disabled__}
|
||||
provider_model_id: vertex_ai/text-embedding-005
|
||||
model_type: embedding
|
||||
- metadata: {}
|
||||
model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama3-8b-8192
|
||||
provider_id: ${env.ENABLE_GROQ:=__disabled__}
|
||||
|
|
|
@ -64,6 +64,9 @@ from llama_stack.providers.remote.inference.sambanova.models import (
|
|||
from llama_stack.providers.remote.inference.together.models import (
|
||||
MODEL_ENTRIES as TOGETHER_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.vertexai.models import (
|
||||
MODEL_ENTRIES as VERTEXAI_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
|
||||
from llama_stack.providers.remote.vector_io.pgvector.config import (
|
||||
PGVectorVectorIOConfig,
|
||||
|
@ -93,6 +96,7 @@ def _get_model_entries_for_provider(provider_type: str) -> list[ProviderModelEnt
|
|||
"databricks": DATABRICKS_MODEL_ENTRIES,
|
||||
"nvidia": NVIDIA_MODEL_ENTRIES,
|
||||
"runpod": RUNPOD_MODEL_ENTRIES,
|
||||
"vertexai": VERTEXAI_MODEL_ENTRIES,
|
||||
}
|
||||
|
||||
# Special handling for providers with dynamic model entries
|
||||
|
@ -354,6 +358,14 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"",
|
||||
"Gemini API Key",
|
||||
),
|
||||
"VERTEX_AI_PROJECT": (
|
||||
"",
|
||||
"Google Cloud Project ID for Vertex AI",
|
||||
),
|
||||
"VERTEX_AI_LOCATION": (
|
||||
"us-central1",
|
||||
"Google Cloud Location for Vertex AI",
|
||||
),
|
||||
"SAMBANOVA_API_KEY": (
|
||||
"",
|
||||
"SambaNova API Key",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue