diff --git a/docs/my-website/docs/pass_through/vertex_ai.md b/docs/my-website/docs/pass_through/vertex_ai.md
index 744c5e3ff..601f89f4b 100644
--- a/docs/my-website/docs/pass_through/vertex_ai.md
+++ b/docs/my-website/docs/pass_through/vertex_ai.md
@@ -69,6 +69,44 @@ generateContent();
+## Quick Start
+
+Let's call the Vertex AI [`/generateContent` endpoint](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference)
+
+1. Add Vertex AI Credentials to your environment
+
+```bash
+export DEFAULT_VERTEXAI_PROJECT="" # "adroit-crow-413218"
+export DEFAULT_VERTEXAI_LOCATION="" # "us-central1"
+export DEFAULT_GOOGLE_APPLICATION_CREDENTIALS="" # "/Users/Downloads/adroit-crow-413218-a956eef1a2a8.json"
+```
+
+2. Start LiteLLM Proxy
+
+```bash
+litellm
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+3. Test it!
+
+Let's call the Google AI Studio token counting endpoint
+
+```bash
+curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-1234" \
+ -d '{
+ "contents":[{
+ "role": "user",
+ "parts":[{"text": "How are you doing today?"}]
+ }]
+ }'
+```
+
+
+
## Supported API Endpoints
- Gemini API
@@ -87,206 +125,12 @@ LiteLLM Proxy Server supports two methods of authentication to Vertex AI:
2. Set Vertex AI credentials on proxy server
-## Quick Start Usage
-
-
-
-
-
-#### 1. Start litellm proxy
-
-```shell
-litellm --config /path/to/config.yaml
-```
-
-#### 2. Test it
-
-```python
-import vertexai
-from vertexai.preview.generative_models import GenerativeModel
-
-LITE_LLM_ENDPOINT = "http://localhost:4000"
-
-vertexai.init(
- project="", # enter your project id
- location="", # enter your region
- api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex_ai", # route on litellm
- api_transport="rest",
-)
-
-model = GenerativeModel(model_name="gemini-1.0-pro")
-model.generate_content("hi")
-
-```
-
-
-
-
-
-
-#### 1. Set `default_vertex_config` on your `config.yaml`
-
-
-Add the following credentials to your litellm config.yaml to use the Vertex AI endpoints.
-
-```yaml
-default_vertex_config:
- vertex_project: "adroit-crow-413218"
- vertex_location: "us-central1"
- vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
-```
-
-#### 2. Start litellm proxy
-
-```shell
-litellm --config /path/to/config.yaml
-```
-
-#### 3. Test it
-
-```python
-import vertexai
-from google.auth.credentials import Credentials
-from vertexai.generative_models import GenerativeModel
-
-LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
-
-import datetime
-
-
-class CredentialsWrapper(Credentials):
- def __init__(self, token=None):
- super().__init__()
- self.token = token
- self.expiry = None # or set to a future date if needed
-
- def refresh(self, request):
- pass
-
- def apply(self, headers, token=None):
- headers["Authorization"] = f"Bearer {self.token}"
-
- @property
- def expired(self):
- return False # Always consider the token as non-expired
-
- @property
- def valid(self):
- return True # Always consider the credentials as valid
-
-
-credentials = CredentialsWrapper(token=LITELLM_PROXY_API_KEY)
-
-vertexai.init(
- project="adroit-crow-413218",
- location="us-central1",
- api_endpoint=LITELLM_PROXY_BASE,
- credentials=credentials,
- api_transport="rest",
-)
-
-model = GenerativeModel("gemini-1.5-flash-001")
-
-response = model.generate_content(
- "What's a good name for a flower shop that specializes in selling bouquets of dried flowers?"
-)
-
-print(response.text)
-```
-
-
-
-
## Usage Examples
### Gemini API (Generate Content)
-
-
-```python
-import vertexai
-from vertexai.generative_models import GenerativeModel
-
-LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
-
-vertexai.init(
- project="adroit-crow-413218",
- location="us-central1",
- api_endpoint=LITELLM_PROXY_BASE,
- api_transport="rest",
-
-)
-
-model = GenerativeModel("gemini-1.5-flash-001")
-
-response = model.generate_content(
- "What's a good name for a flower shop that specializes in selling bouquets of dried flowers?"
-)
-
-print(response.text)
-```
-
-
-
-
-```python
-import vertexai
-from google.auth.credentials import Credentials
-from vertexai.generative_models import GenerativeModel
-
-LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
-
-import datetime
-
-
-class CredentialsWrapper(Credentials):
- def __init__(self, token=None):
- super().__init__()
- self.token = token
- self.expiry = None # or set to a future date if needed
-
- def refresh(self, request):
- pass
-
- def apply(self, headers, token=None):
- headers["Authorization"] = f"Bearer {self.token}"
-
- @property
- def expired(self):
- return False # Always consider the token as non-expired
-
- @property
- def valid(self):
- return True # Always consider the credentials as valid
-
-
-credentials = CredentialsWrapper(token=LITELLM_PROXY_API_KEY)
-
-vertexai.init(
- project="adroit-crow-413218",
- location="us-central1",
- api_endpoint=LITELLM_PROXY_BASE,
- credentials=credentials,
- api_transport="rest",
-
-)
-
-model = GenerativeModel("gemini-1.5-flash-001")
-
-response = model.generate_content(
- "What's a good name for a flower shop that specializes in selling bouquets of dried flowers?"
-)
-
-print(response.text)
-```
-
-
-
```shell
curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:generateContent \
@@ -295,114 +139,10 @@ curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-0
-d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
```
-
-
### Embeddings API
-
-
-
-
-```python
-from typing import List, Optional
-from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel
-import vertexai
-from vertexai.generative_models import GenerativeModel
-
-LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
-
-import datetime
-
-vertexai.init(
- project="adroit-crow-413218",
- location="us-central1",
- api_endpoint=LITELLM_PROXY_BASE,
- api_transport="rest",
-)
-
-
-def embed_text(
- texts: List[str] = ["banana muffins? ", "banana bread? banana muffins?"],
- task: str = "RETRIEVAL_DOCUMENT",
- model_name: str = "text-embedding-004",
- dimensionality: Optional[int] = 256,
-) -> List[List[float]]:
- """Embeds texts with a pre-trained, foundational model."""
- model = TextEmbeddingModel.from_pretrained(model_name)
- inputs = [TextEmbeddingInput(text, task) for text in texts]
- kwargs = dict(output_dimensionality=dimensionality) if dimensionality else {}
- embeddings = model.get_embeddings(inputs, **kwargs)
- return [embedding.values for embedding in embeddings]
-```
-
-
-
-
-
-```python
-from typing import List, Optional
-from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel
-import vertexai
-from google.auth.credentials import Credentials
-from vertexai.generative_models import GenerativeModel
-
-LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
-
-import datetime
-
-
-class CredentialsWrapper(Credentials):
- def __init__(self, token=None):
- super().__init__()
- self.token = token
- self.expiry = None # or set to a future date if needed
-
- def refresh(self, request):
- pass
-
- def apply(self, headers, token=None):
- headers["Authorization"] = f"Bearer {self.token}"
-
- @property
- def expired(self):
- return False # Always consider the token as non-expired
-
- @property
- def valid(self):
- return True # Always consider the credentials as valid
-
-
-credentials = CredentialsWrapper(token=LITELLM_PROXY_API_KEY)
-
-vertexai.init(
- project="adroit-crow-413218",
- location="us-central1",
- api_endpoint=LITELLM_PROXY_BASE,
- credentials=credentials,
- api_transport="rest",
-)
-
-
-def embed_text(
- texts: List[str] = ["banana muffins? ", "banana bread? banana muffins?"],
- task: str = "RETRIEVAL_DOCUMENT",
- model_name: str = "text-embedding-004",
- dimensionality: Optional[int] = 256,
-) -> List[List[float]]:
- """Embeds texts with a pre-trained, foundational model."""
- model = TextEmbeddingModel.from_pretrained(model_name)
- inputs = [TextEmbeddingInput(text, task) for text in texts]
- kwargs = dict(output_dimensionality=dimensionality) if dimensionality else {}
- embeddings = model.get_embeddings(inputs, **kwargs)
- return [embedding.values for embedding in embeddings]
-```
-
-
-
```shell
curl http://localhost:4000/vertex_ai/publishers/google/models/textembedding-gecko@001:predict \
@@ -411,133 +151,9 @@ curl http://localhost:4000/vertex_ai/publishers/google/models/textembedding-geck
-d '{"instances":[{"content": "gm"}]}'
```
-
-
-
### Imagen API
-
-
-
-
-
-```python
-from typing import List, Optional
-from vertexai.preview.vision_models import ImageGenerationModel
-import vertexai
-from google.auth.credentials import Credentials
-
-LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
-
-import datetime
-
-vertexai.init(
- project="adroit-crow-413218",
- location="us-central1",
- api_endpoint=LITELLM_PROXY_BASE,
- api_transport="rest",
-)
-
-model = ImageGenerationModel.from_pretrained("imagen-3.0-generate-001")
-
-images = model.generate_images(
- prompt=prompt,
- # Optional parameters
- number_of_images=1,
- language="en",
- # You can't use a seed value and watermark at the same time.
- # add_watermark=False,
- # seed=100,
- aspect_ratio="1:1",
- safety_filter_level="block_some",
- person_generation="allow_adult",
-)
-
-images[0].save(location=output_file, include_generation_parameters=False)
-
-# Optional. View the generated image in a notebook.
-# images[0].show()
-
-print(f"Created output image using {len(images[0]._image_bytes)} bytes")
-
-```
-
-
-
-
-```python
-from typing import List, Optional
-from vertexai.preview.vision_models import ImageGenerationModel
-import vertexai
-from google.auth.credentials import Credentials
-
-LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
-
-import datetime
-
-
-class CredentialsWrapper(Credentials):
- def __init__(self, token=None):
- super().__init__()
- self.token = token
- self.expiry = None # or set to a future date if needed
-
- def refresh(self, request):
- pass
-
- def apply(self, headers, token=None):
- headers["Authorization"] = f"Bearer {self.token}"
-
- @property
- def expired(self):
- return False # Always consider the token as non-expired
-
- @property
- def valid(self):
- return True # Always consider the credentials as valid
-
-
-credentials = CredentialsWrapper(token=LITELLM_PROXY_API_KEY)
-
-vertexai.init(
- project="adroit-crow-413218",
- location="us-central1",
- api_endpoint=LITELLM_PROXY_BASE,
- credentials=credentials,
- api_transport="rest",
-)
-
-model = ImageGenerationModel.from_pretrained("imagen-3.0-generate-001")
-
-images = model.generate_images(
- prompt=prompt,
- # Optional parameters
- number_of_images=1,
- language="en",
- # You can't use a seed value and watermark at the same time.
- # add_watermark=False,
- # seed=100,
- aspect_ratio="1:1",
- safety_filter_level="block_some",
- person_generation="allow_adult",
-)
-
-images[0].save(location=output_file, include_generation_parameters=False)
-
-# Optional. View the generated image in a notebook.
-# images[0].show()
-
-print(f"Created output image using {len(images[0]._image_bytes)} bytes")
-
-```
-
-
-
-
-
```shell
curl http://localhost:4000/vertex_ai/publishers/google/models/imagen-3.0-generate-001:predict \
-H "Content-Type: application/json" \
@@ -545,252 +161,19 @@ curl http://localhost:4000/vertex_ai/publishers/google/models/imagen-3.0-generat
-d '{"instances":[{"prompt": "make an otter"}], "parameters": {"sampleCount": 1}}'
```
-
-
-
### Count Tokens API
-
-
-
-
-
-
-```python
-from typing import List, Optional
-from vertexai.generative_models import GenerativeModel
-import vertexai
-
-LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
-
-import datetime
-
-vertexai.init(
- project="adroit-crow-413218",
- location="us-central1",
- api_endpoint=LITELLM_PROXY_BASE,
- api_transport="rest",
-)
-
-
-model = GenerativeModel("gemini-1.5-flash-001")
-
-prompt = "Why is the sky blue?"
-
-# Prompt tokens count
-response = model.count_tokens(prompt)
-print(f"Prompt Token Count: {response.total_tokens}")
-print(f"Prompt Character Count: {response.total_billable_characters}")
-
-# Send text to Gemini
-response = model.generate_content(prompt)
-
-# Response tokens count
-usage_metadata = response.usage_metadata
-print(f"Prompt Token Count: {usage_metadata.prompt_token_count}")
-print(f"Candidates Token Count: {usage_metadata.candidates_token_count}")
-print(f"Total Token Count: {usage_metadata.total_token_count}")
-```
-
-
-
-
-
-
-```python
-from typing import List, Optional
-from vertexai.generative_models import GenerativeModel
-import vertexai
-from google.auth.credentials import Credentials
-
-LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
-
-import datetime
-
-
-class CredentialsWrapper(Credentials):
- def __init__(self, token=None):
- super().__init__()
- self.token = token
- self.expiry = None # or set to a future date if needed
-
- def refresh(self, request):
- pass
-
- def apply(self, headers, token=None):
- headers["Authorization"] = f"Bearer {self.token}"
-
- @property
- def expired(self):
- return False # Always consider the token as non-expired
-
- @property
- def valid(self):
- return True # Always consider the credentials as valid
-
-
-credentials = CredentialsWrapper(token=LITELLM_PROXY_API_KEY)
-
-vertexai.init(
- project="adroit-crow-413218",
- location="us-central1",
- api_endpoint=LITELLM_PROXY_BASE,
- credentials=credentials,
- api_transport="rest",
-)
-
-
-model = GenerativeModel("gemini-1.5-flash-001")
-
-prompt = "Why is the sky blue?"
-
-# Prompt tokens count
-response = model.count_tokens(prompt)
-print(f"Prompt Token Count: {response.total_tokens}")
-print(f"Prompt Character Count: {response.total_billable_characters}")
-
-# Send text to Gemini
-response = model.generate_content(prompt)
-
-# Response tokens count
-usage_metadata = response.usage_metadata
-print(f"Prompt Token Count: {usage_metadata.prompt_token_count}")
-print(f"Candidates Token Count: {usage_metadata.candidates_token_count}")
-print(f"Total Token Count: {usage_metadata.total_token_count}")
-```
-
-
-
-
-
-
-
```shell
curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:countTokens \
-H "Content-Type: application/json" \
-H "x-litellm-api-key: Bearer sk-1234" \
-d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
```
-
-
-
-
### Tuning API
Create Fine Tuning Job
-
-
-
-
-```python
-from typing import List, Optional
-from vertexai.preview.tuning import sft
-import vertexai
-
-LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
-
-
-vertexai.init(
- project="adroit-crow-413218",
- location="us-central1",
- api_endpoint=LITELLM_PROXY_BASE,
- api_transport="rest",
-)
-
-
-# TODO(developer): Update project
-vertexai.init(project=PROJECT_ID, location="us-central1")
-
-sft_tuning_job = sft.train(
- source_model="gemini-1.0-pro-002",
- train_dataset="gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl",
-)
-
-# Polling for job completion
-while not sft_tuning_job.has_ended:
- time.sleep(60)
- sft_tuning_job.refresh()
-
-print(sft_tuning_job.tuned_model_name)
-print(sft_tuning_job.tuned_model_endpoint_name)
-print(sft_tuning_job.experiment)
-
-```
-
-
-
-
-
-```python
-from typing import List, Optional
-from vertexai.preview.tuning import sft
-import vertexai
-from google.auth.credentials import Credentials
-
-LITELLM_PROXY_API_KEY = "sk-1234"
-LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
-
-import datetime
-
-
-class CredentialsWrapper(Credentials):
- def __init__(self, token=None):
- super().__init__()
- self.token = token
- self.expiry = None # or set to a future date if needed
-
- def refresh(self, request):
- pass
-
- def apply(self, headers, token=None):
- headers["Authorization"] = f"Bearer {self.token}"
-
- @property
- def expired(self):
- return False # Always consider the token as non-expired
-
- @property
- def valid(self):
- return True # Always consider the credentials as valid
-
-
-credentials = CredentialsWrapper(token=LITELLM_PROXY_API_KEY)
-
-vertexai.init(
- project="adroit-crow-413218",
- location="us-central1",
- api_endpoint=LITELLM_PROXY_BASE,
- credentials=credentials,
- api_transport="rest",
-)
-
-
-# TODO(developer): Update project
-vertexai.init(project=PROJECT_ID, location="us-central1")
-
-sft_tuning_job = sft.train(
- source_model="gemini-1.0-pro-002",
- train_dataset="gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl",
-)
-
-# Polling for job completion
-while not sft_tuning_job.has_ended:
- time.sleep(60)
- sft_tuning_job.refresh()
-
-print(sft_tuning_job.tuned_model_name)
-print(sft_tuning_job.tuned_model_endpoint_name)
-print(sft_tuning_job.experiment)
-```
-
-
-
-
```shell
curl http://localhost:4000/vertex_ai/tuningJobs \
@@ -804,118 +187,6 @@ curl http://localhost:4000/vertex_ai/tuningJobs \
}'
```
-
-
-
-
-
-### Context Caching
-
-Use Vertex AI Context Caching
-
-[**Relevant VertexAI Docs**](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-overview)
-
-
-
-
-
-1. Add model to config.yaml
-```yaml
-model_list:
- # used for /chat/completions, /completions, /embeddings endpoints
- - model_name: gemini-1.5-pro-001
- litellm_params:
- model: vertex_ai/gemini-1.5-pro-001
- vertex_project: "project-id"
- vertex_location: "us-central1"
- vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
-
-# used for the /cachedContent and vertexAI native endpoints
-default_vertex_config:
- vertex_project: "adroit-crow-413218"
- vertex_location: "us-central1"
- vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
-
-```
-
-2. Start Proxy
-
-```
-$ litellm --config /path/to/config.yaml
-```
-
-3. Make Request!
-We make the request in two steps:
-- Create a cachedContents object
-- Use the cachedContents object in your /chat/completions
-
-**Create a cachedContents object**
-
-First, create a cachedContents object by calling the Vertex `cachedContents` endpoint. The LiteLLM proxy forwards the `/cachedContents` request to the VertexAI API.
-
-```python
-import httpx
-
-# Set Litellm proxy variables
-LITELLM_BASE_URL = "http://0.0.0.0:4000"
-LITELLM_PROXY_API_KEY = "sk-1234"
-
-httpx_client = httpx.Client(timeout=30)
-
-print("Creating cached content")
-create_cache = httpx_client.post(
- url=f"{LITELLM_BASE_URL}/vertex_ai/cachedContents",
- headers={"x-litellm-api-key": f"Bearer {LITELLM_PROXY_API_KEY}"},
- json={
- "model": "gemini-1.5-pro-001",
- "contents": [
- {
- "role": "user",
- "parts": [{
- "text": "This is sample text to demonstrate explicit caching." * 4000
- }]
- }
- ],
- }
-)
-
-print("Response from create_cache:", create_cache)
-create_cache_response = create_cache.json()
-print("JSON from create_cache:", create_cache_response)
-cached_content_name = create_cache_response["name"]
-```
-
-**Use the cachedContents object in your /chat/completions request to VertexAI**
-
-```python
-import openai
-
-# Set Litellm proxy variables
-LITELLM_BASE_URL = "http://0.0.0.0:4000"
-LITELLM_PROXY_API_KEY = "sk-1234"
-
-client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
-
-response = client.chat.completions.create(
- model="gemini-1.5-pro-001",
- max_tokens=8192,
- messages=[
- {
- "role": "user",
- "content": "What is the sample text about?",
- },
- ],
- temperature=0.7,
- extra_body={"cached_content": cached_content_name}, # Use the cached content
-)
-
-print("Response from proxy:", response)
-```
-
-
-
-
-
## Advanced
Pre-requisites
@@ -930,6 +201,11 @@ Use this, to avoid giving developers the raw Anthropic API key, but still lettin
```bash
export DATABASE_URL=""
export LITELLM_MASTER_KEY=""
+
+# vertex ai credentials
+export DEFAULT_VERTEXAI_PROJECT="" # "adroit-crow-413218"
+export DEFAULT_VERTEXAI_LOCATION="" # "us-central1"
+export DEFAULT_GOOGLE_APPLICATION_CREDENTIALS="" # "/Users/Downloads/adroit-crow-413218-a956eef1a2a8.json"
```
```bash