44 KiB
import Image from '@theme/IdealImage'; import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem';
VertexAI [Anthropic, Gemini, Model Garden]
🆕 vertex_ai_beta/
route
New vertex_ai_beta/
route. Adds support for system messages, tool_choice params, etc. by moving to httpx client (instead of vertex sdk). This implementation uses VertexAI's REST API.
from litellm import completion
import json
## GET CREDENTIALS
file_path = 'path/to/vertex_ai_service_account.json'
# Load the JSON file
with open(file_path, 'r') as file:
vertex_credentials = json.load(file)
# Convert to JSON string
vertex_credentials_json = json.dumps(vertex_credentials)
## COMPLETION CALL
response = completion(
model="vertex_ai_beta/gemini-pro",
messages=[{ "content": "Hello, how are you?","role": "user"}],
vertex_credentials=vertex_credentials_json
)
System Message
from litellm import completion
import json
## GET CREDENTIALS
file_path = 'path/to/vertex_ai_service_account.json'
# Load the JSON file
with open(file_path, 'r') as file:
vertex_credentials = json.load(file)
# Convert to JSON string
vertex_credentials_json = json.dumps(vertex_credentials)
response = completion(
model="vertex_ai_beta/gemini-pro",
messages=[{"content": "You are a good bot.","role": "system"}, {"content": "Hello, how are you?","role": "user"}],
vertex_credentials=vertex_credentials_json
)
Function Calling
Force Gemini to make tool calls with tool_choice="required"
.
from litellm import completion
import json
## GET CREDENTIALS
file_path = 'path/to/vertex_ai_service_account.json'
# Load the JSON file
with open(file_path, 'r') as file:
vertex_credentials = json.load(file)
# Convert to JSON string
vertex_credentials_json = json.dumps(vertex_credentials)
messages = [
{
"role": "system",
"content": "Your name is Litellm Bot, you are a helpful assistant",
},
# User asks for their name and weather in San Francisco
{
"role": "user",
"content": "Hello, what is your name and can you tell me the weather?",
},
]
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
}
},
"required": ["location"],
},
},
}
]
data = {
"model": "vertex_ai_beta/gemini-1.5-pro-preview-0514"),
"messages": messages,
"tools": tools,
"tool_choice": "required",
"vertex_credentials": vertex_credentials_json
}
## COMPLETION CALL
print(completion(**data))
JSON Schema
From v1.40.1+
LiteLLM supports sending response_schema
as a param for Gemini-1.5-Pro on Vertex AI. For other models (e.g. gemini-1.5-flash
or claude-3-5-sonnet
), LiteLLM adds the schema to the message list with a user-controlled prompt.
Response Schema
from litellm import completion
import json
## SETUP ENVIRONMENT
# !gcloud auth application-default login - run this to add vertex credentials to your env
messages = [
{
"role": "user",
"content": "List 5 popular cookie recipes."
}
]
response_schema = {
"type": "array",
"items": {
"type": "object",
"properties": {
"recipe_name": {
"type": "string",
},
},
"required": ["recipe_name"],
},
}
completion(
model="vertex_ai_beta/gemini-1.5-pro",
messages=messages,
response_format={"type": "json_object", "response_schema": response_schema} # 👈 KEY CHANGE
)
print(json.loads(completion.choices[0].message.content))
- Add model to config.yaml
model_list:
- model_name: gemini-pro
litellm_params:
model: vertex_ai_beta/gemini-1.5-pro
vertex_project: "project-id"
vertex_location: "us-central1"
vertex_credentials: "/path/to/service_account.json" # [OPTIONAL] Do this OR `!gcloud auth application-default login` - run this to add vertex credentials to your env
- Start Proxy
$ litellm --config /path/to/config.yaml
- Make Request!
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-D '{
"model": "gemini-pro",
"messages": [
{"role": "user", "content": "List 5 popular cookie recipes."}
],
"response_format": {"type": "json_object", "response_schema": {
"type": "array",
"items": {
"type": "object",
"properties": {
"recipe_name": {
"type": "string",
},
},
"required": ["recipe_name"],
},
}}
}
'
Validate Schema
To validate the response_schema, set enforce_validation: true
.
from litellm import completion, JSONSchemaValidationError
try:
completion(
model="vertex_ai_beta/gemini-1.5-pro",
messages=messages,
response_format={
"type": "json_object",
"response_schema": response_schema,
"enforce_validation": true # 👈 KEY CHANGE
}
)
except JSONSchemaValidationError as e:
print("Raw Response: {}".format(e.raw_response))
raise e
- Add model to config.yaml
model_list:
- model_name: gemini-pro
litellm_params:
model: vertex_ai_beta/gemini-1.5-pro
vertex_project: "project-id"
vertex_location: "us-central1"
vertex_credentials: "/path/to/service_account.json" # [OPTIONAL] Do this OR `!gcloud auth application-default login` - run this to add vertex credentials to your env
- Start Proxy
$ litellm --config /path/to/config.yaml
- Make Request!
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-D '{
"model": "gemini-pro",
"messages": [
{"role": "user", "content": "List 5 popular cookie recipes."}
],
"response_format": {"type": "json_object", "response_schema": {
"type": "array",
"items": {
"type": "object",
"properties": {
"recipe_name": {
"type": "string",
},
},
"required": ["recipe_name"],
},
},
"enforce_validation": true
}
}
'
LiteLLM will validate the response against the schema, and raise a JSONSchemaValidationError
if the response does not match the schema.
JSONSchemaValidationError inherits from openai.APIError
Access the raw response with e.raw_response
Add to prompt yourself
from litellm import completion
## GET CREDENTIALS
file_path = 'path/to/vertex_ai_service_account.json'
# Load the JSON file
with open(file_path, 'r') as file:
vertex_credentials = json.load(file)
# Convert to JSON string
vertex_credentials_json = json.dumps(vertex_credentials)
messages = [
{
"role": "user",
"content": """
List 5 popular cookie recipes.
Using this JSON schema:
Recipe = {"recipe_name": str}
Return a `list[Recipe]`
"""
}
]
completion(model="vertex_ai_beta/gemini-1.5-flash-preview-0514", messages=messages, response_format={ "type": "json_object" })
Grounding
Add Google Search Result grounding to vertex ai calls.
See the grounding metadata with response_obj._hidden_params["vertex_ai_grounding_metadata"]
from litellm import completion
## SETUP ENVIRONMENT
# !gcloud auth application-default login - run this to add vertex credentials to your env
tools = [{"googleSearchRetrieval": {}}] # 👈 ADD GOOGLE SEARCH
resp = litellm.completion(
model="vertex_ai_beta/gemini-1.0-pro-001",
messages=[{"role": "user", "content": "Who won the world cup?"}],
tools=tools,
)
print(resp)
curl http://0.0.0.0:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-d '{
"model": "gemini-pro",
"messages": [{"role": "user", "content": "Who won the world cup?"}],
"tools": [
{
"googleSearchResults": {}
}
]
}'
Moving from Vertex AI SDK to LiteLLM (GROUNDING)
If this was your initial VertexAI Grounding code,
import vertexai
vertexai.init(project=project_id, location="us-central1")
model = GenerativeModel("gemini-1.5-flash-001")
# Use Google Search for grounding
tool = Tool.from_google_search_retrieval(grounding.GoogleSearchRetrieval(disable_attributon=False))
prompt = "When is the next total solar eclipse in US?"
response = model.generate_content(
prompt,
tools=[tool],
generation_config=GenerationConfig(
temperature=0.0,
),
)
print(response)
then, this is what it looks like now
from litellm import completion
# !gcloud auth application-default login - run this to add vertex credentials to your env
tools = [{"googleSearchRetrieval": {"disable_attributon": False}}] # 👈 ADD GOOGLE SEARCH
resp = litellm.completion(
model="vertex_ai_beta/gemini-1.0-pro-001",
messages=[{"role": "user", "content": "Who won the world cup?"}],
tools=tools,
vertex_project="project-id"
)
print(resp)
Context Caching
Use Vertex AI Context Caching
- Add model to config.yaml
model_list:
# used for /chat/completions, /completions, /embeddings endpoints
- model_name: gemini-1.5-pro-001
litellm_params:
model: vertex_ai_beta/gemini-1.5-pro-001
vertex_project: "project-id"
vertex_location: "us-central1"
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
# used for the /cachedContent and vertexAI native endpoints
default_vertex_config:
vertex_project: "adroit-crow-413218"
vertex_location: "us-central1"
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
- Start Proxy
$ litellm --config /path/to/config.yaml
- Make Request! We make the request in two steps:
- Create a cachedContents object
- Use the cachedContents object in your /chat/completions
Create a cachedContents object
First, create a cachedContents object by calling the Vertex cachedContents
endpoint. The LiteLLM proxy forwards the /cachedContents
request to the VertexAI API.
import httpx
# Set Litellm proxy variables
LITELLM_BASE_URL = "http://0.0.0.0:4000"
LITELLM_PROXY_API_KEY = "sk-1234"
httpx_client = httpx.Client(timeout=30)
print("Creating cached content")
create_cache = httpx_client.post(
url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
json={
"model": "gemini-1.5-pro-001",
"contents": [
{
"role": "user",
"parts": [{
"text": "This is sample text to demonstrate explicit caching." * 4000
}]
}
],
}
)
print("Response from create_cache:", create_cache)
create_cache_response = create_cache.json()
print("JSON from create_cache:", create_cache_response)
cached_content_name = create_cache_response["name"]
Use the cachedContents object in your /chat/completions request to VertexAI
import openai
# Set Litellm proxy variables
LITELLM_BASE_URL = "http://0.0.0.0:4000"
LITELLM_PROXY_API_KEY = "sk-1234"
client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
response = client.chat.completions.create(
model="gemini-1.5-pro-001",
max_tokens=8192,
messages=[
{
"role": "user",
"content": "What is the sample text about?",
},
],
temperature=0.7,
extra_body={"cached_content": cached_content_name}, # Use the cached content
)
print("Response from proxy:", response)
Pre-requisites
-
pip install google-cloud-aiplatform
(pre-installed on proxy docker image) -
Authentication:
- run
gcloud auth application-default login
See Google Cloud Docs - Alternatively you can set
GOOGLE_APPLICATION_CREDENTIALS
Here's how: Jump to Code
- Create a service account on GCP - Export the credentials as a json - load the json and json.dump the json as a string - store the json string in your environment as `GOOGLE_APPLICATION_CREDENTIALS`
- run
Sample Usage
import litellm
litellm.vertex_project = "hardy-device-38811" # Your Project ID
litellm.vertex_location = "us-central1" # proj location
response = litellm.completion(model="gemini-pro", messages=[{"role": "user", "content": "write code for saying hi from LiteLLM"}])
Usage with LiteLLM Proxy Server
Here's how to use Vertex AI with the LiteLLM Proxy Server
- Modify the config.yaml
Use this when you need to set a different location for each vertex model
model_list:
- model_name: gemini-vision
litellm_params:
model: vertex_ai/gemini-1.0-pro-vision-001
vertex_project: "project-id"
vertex_location: "us-central1"
- model_name: gemini-vision
litellm_params:
model: vertex_ai/gemini-1.0-pro-vision-001
vertex_project: "project-id2"
vertex_location: "us-east"
Use this when you have one vertex location for all models
litellm_settings:
vertex_project: "hardy-device-38811" # Your Project ID
vertex_location: "us-central1" # proj location
model_list:
-model_name: team1-gemini-pro
litellm_params:
model: gemini-pro
- Start the proxy
$ litellm --config /path/to/config.yaml
- Send Request to LiteLLM Proxy Server
import openai
client = openai.OpenAI(
api_key="sk-1234", # pass litellm proxy key, if you're using virtual keys
base_url="http://0.0.0.0:4000" # litellm-proxy-base url
)
response = client.chat.completions.create(
model="team1-gemini-pro",
messages = [
{
"role": "user",
"content": "what llm are you"
}
],
)
print(response)
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"model": "team1-gemini-pro",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
}'
Specifying Safety Settings
In certain use-cases you may need to make calls to the models and pass safety settigns different from the defaults. To do so, simple pass the safety_settings
argument to completion
or acompletion
. For example:
response = completion(
model="vertex_ai/gemini-pro",
messages=[{"role": "user", "content": "write code for saying hi from LiteLLM"}]
safety_settings=[
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_NONE",
},
]
)
Option 1: Set in config
model_list:
- model_name: gemini-experimental
litellm_params:
model: vertex_ai/gemini-experimental
vertex_project: litellm-epic
vertex_location: us-central1
safety_settings:
- category: HARM_CATEGORY_HARASSMENT
threshold: BLOCK_NONE
- category: HARM_CATEGORY_HATE_SPEECH
threshold: BLOCK_NONE
- category: HARM_CATEGORY_SEXUALLY_EXPLICIT
threshold: BLOCK_NONE
- category: HARM_CATEGORY_DANGEROUS_CONTENT
threshold: BLOCK_NONE
Option 2: Set on call
response = client.chat.completions.create(
model="gemini-experimental",
messages=[
{
"role": "user",
"content": "Can you write exploits?",
}
],
max_tokens=8192,
stream=False,
temperature=0.0,
extra_body={
"safety_settings": [
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_NONE",
},
],
}
)
Set Vertex Project & Vertex Location
All calls using Vertex AI require the following parameters:
- Your Project ID
import os, litellm
# set via env var
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811" # Your Project ID`
### OR ###
# set directly on module
litellm.vertex_project = "hardy-device-38811" # Your Project ID`
- Your Project Location
import os, litellm
# set via env var
os.environ["VERTEXAI_LOCATION"] = "us-central1 # Your Location
### OR ###
# set directly on module
litellm.vertex_location = "us-central1 # Your Location
Anthropic
Model Name | Function Call |
---|---|
claude-3-opus@20240229 | completion('vertex_ai/claude-3-opus@20240229', messages) |
claude-3-5-sonnet@20240620 | completion('vertex_ai/claude-3-5-sonnet@20240620', messages) |
claude-3-sonnet@20240229 | completion('vertex_ai/claude-3-sonnet@20240229', messages) |
claude-3-haiku@20240307 | completion('vertex_ai/claude-3-haiku@20240307', messages) |
Usage
from litellm import completion
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ""
model = "claude-3-sonnet@20240229"
vertex_ai_project = "your-vertex-project" # can also set this as os.environ["VERTEXAI_PROJECT"]
vertex_ai_location = "your-vertex-location" # can also set this as os.environ["VERTEXAI_LOCATION"]
response = completion(
model="vertex_ai/" + model,
messages=[{"role": "user", "content": "hi"}],
temperature=0.7,
vertex_ai_project=vertex_ai_project,
vertex_ai_location=vertex_ai_location,
)
print("\nModel Response", response)
1. Add to config
model_list:
- model_name: anthropic-vertex
litellm_params:
model: vertex_ai/claude-3-sonnet@20240229
vertex_ai_project: "my-test-project"
vertex_ai_location: "us-east-1"
- model_name: anthropic-vertex
litellm_params:
model: vertex_ai/claude-3-sonnet@20240229
vertex_ai_project: "my-test-project"
vertex_ai_location: "us-west-1"
2. Start proxy
litellm --config /path/to/config.yaml
# RUNNING at http://0.0.0.0:4000
3. Test it!
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic-vertex", # 👈 the 'model_name' in config
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
}'
Llama 3 API
Model Name | Function Call |
---|---|
meta/llama3-405b-instruct-maas | completion('vertex_ai/meta/llama3-405b-instruct-maas', messages) |
Usage
from litellm import completion
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ""
model = "meta/llama3-405b-instruct-maas"
vertex_ai_project = "your-vertex-project" # can also set this as os.environ["VERTEXAI_PROJECT"]
vertex_ai_location = "your-vertex-location" # can also set this as os.environ["VERTEXAI_LOCATION"]
response = completion(
model="vertex_ai/" + model,
messages=[{"role": "user", "content": "hi"}],
vertex_ai_project=vertex_ai_project,
vertex_ai_location=vertex_ai_location,
)
print("\nModel Response", response)
1. Add to config
model_list:
- model_name: anthropic-llama
litellm_params:
model: vertex_ai/meta/llama3-405b-instruct-maas
vertex_ai_project: "my-test-project"
vertex_ai_location: "us-east-1"
- model_name: anthropic-llama
litellm_params:
model: vertex_ai/meta/llama3-405b-instruct-maas
vertex_ai_project: "my-test-project"
vertex_ai_location: "us-west-1"
2. Start proxy
litellm --config /path/to/config.yaml
# RUNNING at http://0.0.0.0:4000
3. Test it!
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic-llama", # 👈 the 'model_name' in config
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
}'
Mistral API
Model Name | Function Call |
---|---|
mistral-large@latest | completion('vertex_ai/mistral-large@latest', messages) |
mistral-large@2407 | completion('vertex_ai/mistral-large@2407', messages) |
mistral-nemo@latest | completion('vertex_ai/mistral-nemo@latest', messages) |
codestral@latest | completion('vertex_ai/codestral@latest', messages) |
codestral@@2405 | completion('vertex_ai/codestral@2405', messages) |
Usage
from litellm import completion
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ""
model = "mistral-large@2407"
vertex_ai_project = "your-vertex-project" # can also set this as os.environ["VERTEXAI_PROJECT"]
vertex_ai_location = "your-vertex-location" # can also set this as os.environ["VERTEXAI_LOCATION"]
response = completion(
model="vertex_ai/" + model,
messages=[{"role": "user", "content": "hi"}],
vertex_ai_project=vertex_ai_project,
vertex_ai_location=vertex_ai_location,
)
print("\nModel Response", response)
1. Add to config
model_list:
- model_name: vertex-mistral
litellm_params:
model: vertex_ai/mistral-large@2407
vertex_ai_project: "my-test-project"
vertex_ai_location: "us-east-1"
- model_name: vertex-mistral
litellm_params:
model: vertex_ai/mistral-large@2407
vertex_ai_project: "my-test-project"
vertex_ai_location: "us-west-1"
2. Start proxy
litellm --config /path/to/config.yaml
# RUNNING at http://0.0.0.0:4000
3. Test it!
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"model": "vertex-mistral", # 👈 the 'model_name' in config
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
}'
Usage - Codestral FIM
Call Codestral on VertexAI via the OpenAI /v1/completion
endpoint for FIM tasks.
Note: You can also call Codestral via /chat/completion
.
from litellm import completion
import os
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ""
# OR run `!gcloud auth print-access-token` in your terminal
model = "codestral@2405"
vertex_ai_project = "your-vertex-project" # can also set this as os.environ["VERTEXAI_PROJECT"]
vertex_ai_location = "your-vertex-location" # can also set this as os.environ["VERTEXAI_LOCATION"]
response = text_completion(
model="vertex_ai/" + model,
vertex_ai_project=vertex_ai_project,
vertex_ai_location=vertex_ai_location,
prompt="def is_odd(n): \n return n % 2 == 1 \ndef test_is_odd():",
suffix="return True", # optional
temperature=0, # optional
top_p=1, # optional
max_tokens=10, # optional
min_tokens=10, # optional
seed=10, # optional
stop=["return"], # optional
)
print("\nModel Response", response)
1. Add to config
model_list:
- model_name: vertex-codestral
litellm_params:
model: vertex_ai/codestral@2405
vertex_ai_project: "my-test-project"
vertex_ai_location: "us-east-1"
- model_name: vertex-codestral
litellm_params:
model: vertex_ai/codestral@2405
vertex_ai_project: "my-test-project"
vertex_ai_location: "us-west-1"
2. Start proxy
litellm --config /path/to/config.yaml
# RUNNING at http://0.0.0.0:4000
3. Test it!
curl -X POST 'http://0.0.0.0:4000/completions' \
-H 'Authorization: Bearer sk-1234' \
-H 'Content-Type: application/json' \
-d '{
"model": "vertex-codestral", # 👈 the 'model_name' in config
"prompt": "def is_odd(n): \n return n % 2 == 1 \ndef test_is_odd():",
"suffix":"return True", # optional
"temperature":0, # optional
"top_p":1, # optional
"max_tokens":10, # optional
"min_tokens":10, # optional
"seed":10, # optional
"stop":["return"], # optional
}'
Model Garden
Model Name | Function Call |
---|---|
llama2 | completion('vertex_ai/<endpoint_id>', messages) |
Using Model Garden
from litellm import completion
import os
## set ENV variables
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
os.environ["VERTEXAI_LOCATION"] = "us-central1"
response = completion(
model="vertex_ai/<your-endpoint-id>",
messages=[{ "content": "Hello, how are you?","role": "user"}]
)
Gemini Pro
Model Name | Function Call |
---|---|
gemini-pro | completion('gemini-pro', messages) , completion('vertex_ai/gemini-pro', messages) |
Gemini Pro Vision
Model Name | Function Call |
---|---|
gemini-pro-vision | completion('gemini-pro-vision', messages) , completion('vertex_ai/gemini-pro-vision', messages) |
Gemini 1.5 Pro (and Vision)
Model Name | Function Call |
---|---|
gemini-1.5-pro | completion('gemini-1.5-pro', messages) , completion('vertex_ai/gemini-1.5-pro', messages) |
gemini-1.5-flash-preview-0514 | completion('gemini-1.5-flash-preview-0514', messages) , completion('vertex_ai/gemini-1.5-flash-preview-0514', messages) |
gemini-1.5-pro-preview-0514 | completion('gemini-1.5-pro-preview-0514', messages) , completion('vertex_ai/gemini-1.5-pro-preview-0514', messages) |
Using Gemini Pro Vision
Call gemini-pro-vision
in the same input/output format as OpenAI gpt-4-vision
LiteLLM Supports the following image types passed in url
- Images with Cloud Storage URIs - gs://cloud-samples-data/generative-ai/image/boats.jpeg
- Images with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
- Videos with Cloud Storage URIs - https://storage.googleapis.com/github-repo/img/gemini/multimodality_usecases_overview/pixel8.mp4
- Base64 Encoded Local Images
Example Request - image url
import litellm
response = litellm.completion(
model = "vertex_ai/gemini-pro-vision",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "Whats in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
}
}
]
}
],
)
print(response)
import litellm
def encode_image(image_path):
import base64
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
image_path = "cached_logo.jpg"
# Getting the base64 string
base64_image = encode_image(image_path)
response = litellm.completion(
model="vertex_ai/gemini-pro-vision",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Whats in this image?"},
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64," + base64_image
},
},
],
}
],
)
print(response)
Usage - Function Calling
LiteLLM supports Function Calling for Vertex AI gemini models.
from litellm import completion
import os
# set env
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ".."
os.environ["VERTEX_AI_PROJECT"] = ".."
os.environ["VERTEX_AI_LOCATION"] = ".."
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
response = completion(
model="vertex_ai/gemini-pro-vision",
messages=messages,
tools=tools,
)
# Add any assertions, here to check response args
print(response)
assert isinstance(response.choices[0].message.tool_calls[0].function.name, str)
assert isinstance(
response.choices[0].message.tool_calls[0].function.arguments, str
)
Usage - PDF / Videos / etc. Files
Pass any file supported by Vertex AI, through LiteLLM.
Using gs://
from litellm import completion
response = completion(
model="vertex_ai/gemini-1.5-flash",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
{
"type": "image_url",
"image_url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf", # 👈 PDF
},
],
}
],
max_tokens=300,
)
print(response.choices[0])
using base64
from litellm import completion
import base64
import requests
# URL of the file
url = "https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf"
# Download the file
response = requests.get(url)
file_data = response.content
encoded_file = base64.b64encode(file_data).decode("utf-8")
response = completion(
model="vertex_ai/gemini-1.5-flash",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
{
"type": "image_url",
"image_url": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
},
],
}
],
max_tokens=300,
)
print(response.choices[0])
- Add model to config
- model_name: gemini-1.5-flash
litellm_params:
model: vertex_ai/gemini-1.5-flash
vertex_credentials: "/path/to/service_account.json"
- Start Proxy
litellm --config /path/to/config.yaml
- Test it!
Using gs://
curl http://0.0.0.0:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
-d '{
"model": "gemini-1.5-flash",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "You are a very professional document summarization specialist. Please summarize the given document"
},
{
"type": "image_url",
"image_url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf" # 👈 PDF
}
}
]
}
],
"max_tokens": 300
}'
curl http://0.0.0.0:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
-d '{
"model": "gemini-1.5-flash",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "You are a very professional document summarization specialist. Please summarize the given document"
},
{
"type": "image_url",
"image_url": "data:application/pdf;base64,{encoded_file}" # 👈 PDF
}
}
]
}
],
"max_tokens": 300
}'
Chat Models
Model Name | Function Call |
---|---|
chat-bison-32k | completion('chat-bison-32k', messages) |
chat-bison | completion('chat-bison', messages) |
chat-bison@001 | completion('chat-bison@001', messages) |
Code Chat Models
Model Name | Function Call |
---|---|
codechat-bison | completion('codechat-bison', messages) |
codechat-bison-32k | completion('codechat-bison-32k', messages) |
codechat-bison@001 | completion('codechat-bison@001', messages) |
Text Models
Model Name | Function Call |
---|---|
text-bison | completion('text-bison', messages) |
text-bison@001 | completion('text-bison@001', messages) |
Code Text Models
Model Name | Function Call |
---|---|
code-bison | completion('code-bison', messages) |
code-bison@001 | completion('code-bison@001', messages) |
code-gecko@001 | completion('code-gecko@001', messages) |
code-gecko@latest | completion('code-gecko@latest', messages) |
Embedding Models
Usage - Embedding
import litellm
from litellm import embedding
litellm.vertex_project = "hardy-device-38811" # Your Project ID
litellm.vertex_location = "us-central1" # proj location
response = embedding(
model="vertex_ai/textembedding-gecko",
input=["good morning from litellm"],
)
print(response)
Supported Embedding Models
All models listed here are supported
Model Name | Function Call |
---|---|
text-embedding-004 | embedding(model="vertex_ai/text-embedding-004", input) |
text-multilingual-embedding-002 | embedding(model="vertex_ai/text-multilingual-embedding-002", input) |
textembedding-gecko | embedding(model="vertex_ai/textembedding-gecko", input) |
textembedding-gecko-multilingual | embedding(model="vertex_ai/textembedding-gecko-multilingual", input) |
textembedding-gecko-multilingual@001 | embedding(model="vertex_ai/textembedding-gecko-multilingual@001", input) |
textembedding-gecko@001 | embedding(model="vertex_ai/textembedding-gecko@001", input) |
textembedding-gecko@003 | embedding(model="vertex_ai/textembedding-gecko@003", input) |
text-embedding-preview-0409 | embedding(model="vertex_ai/text-embedding-preview-0409", input) |
text-multilingual-embedding-preview-0409 | embedding(model="vertex_ai/text-multilingual-embedding-preview-0409", input) |
Advanced Use task_type
and title
(Vertex Specific Params)
👉 task_type
and title
are vertex specific params
LiteLLM Supported Vertex Specific Params
auto_truncate: Optional[bool] = None
task_type: Optional[Literal["RETRIEVAL_QUERY","RETRIEVAL_DOCUMENT", "SEMANTIC_SIMILARITY", "CLASSIFICATION", "CLUSTERING", "QUESTION_ANSWERING", "FACT_VERIFICATION"]] = None
title: Optional[str] = None # The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
Example Usage with LiteLLM
response = litellm.embedding(
model="vertex_ai/text-embedding-004",
input=["good morning from litellm", "gm"]
task_type = "RETRIEVAL_DOCUMENT",
dimensions=1,
auto_truncate=True,
)
Image Generation Models
Usage
response = await litellm.aimage_generation(
prompt="An olympic size swimming pool",
model="vertex_ai/imagegeneration@006",
vertex_ai_project="adroit-crow-413218",
vertex_ai_location="us-central1",
)
Generating multiple images
Use the n
parameter to pass how many images you want generated
response = await litellm.aimage_generation(
prompt="An olympic size swimming pool",
model="vertex_ai/imagegeneration@006",
vertex_ai_project="adroit-crow-413218",
vertex_ai_location="us-central1",
n=1,
)
Extra
Using GOOGLE_APPLICATION_CREDENTIALS
Here's the code for storing your service account credentials as GOOGLE_APPLICATION_CREDENTIALS
environment variable:
def load_vertex_ai_credentials():
# Define the path to the vertex_key.json file
print("loading vertex ai credentials")
filepath = os.path.dirname(os.path.abspath(__file__))
vertex_key_path = filepath + "/vertex_key.json"
# Read the existing content of the file or create an empty dictionary
try:
with open(vertex_key_path, "r") as file:
# Read the file content
print("Read vertexai file path")
content = file.read()
# If the file is empty or not valid JSON, create an empty dictionary
if not content or not content.strip():
service_account_key_data = {}
else:
# Attempt to load the existing JSON content
file.seek(0)
service_account_key_data = json.load(file)
except FileNotFoundError:
# If the file doesn't exist, create an empty dictionary
service_account_key_data = {}
# Create a temporary file
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file:
# Write the updated content to the temporary file
json.dump(service_account_key_data, temp_file, indent=2)
# Export the temporary file as GOOGLE_APPLICATION_CREDENTIALS
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.abspath(temp_file.name)
Using GCP Service Account
:::info
Trying to deploy LiteLLM on Google Cloud Run? Tutorial here
:::
- Figure out the Service Account bound to the Google Cloud Run service
<Image img={require('../../img/gcp_acc_1.png')} />
-
Get the FULL EMAIL address of the corresponding Service Account
-
Next, go to IAM & Admin > Manage Resources , select your top-level project that houses your Google Cloud Run Service
Click Add Principal
<Image img={require('../../img/gcp_acc_2.png')}/>
- Specify the Service Account as the principal and Vertex AI User as the role
<Image img={require('../../img/gcp_acc_3.png')}/>
Once that's done, when you deploy the new container in the Google Cloud Run service, LiteLLM will have automatic access to all Vertex AI endpoints.
s/o @Darien Kindlund for this tutorial