feat - allow sending tags on vertex pass through requests (#6876)

* feat - allow tagging vertex JS SDK request

* add unit testing for passing headers for pass through endpoints

* fix allow using vertex_ai as the primary way for pass through vertex endpoints

* docs on vertex js pass tags

* add e2e test for vertex pass through with spend tags

* add e2e tests for streaming vertex JS with tags

* fix vertex ai testing
This commit is contained in:
Ishaan Jaff 2024-11-25 12:12:09 -08:00 committed by GitHub
parent c73ce95c01
commit f77bf49772
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 548 additions and 77 deletions

View file

@ -4,17 +4,9 @@ import TabItem from '@theme/TabItem';
# Vertex AI SDK
Use VertexAI SDK to call endpoints on LiteLLM Gateway (native provider format)
:::tip
Looking for the Unified API (OpenAI format) for VertexAI ? [Go here - using vertexAI with LiteLLM SDK or LiteLLM Proxy Server](../providers/vertex.md)
:::
Pass-through endpoints for Vertex AI - call provider-specific endpoint, in native format (no translation).
Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE_URL/vertex-ai`
Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE_URL/vertex_ai`
#### **Example Usage**
@ -23,9 +15,9 @@ Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE
<TabItem value="curl" label="curl">
```bash
curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \
curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.0-pro:generateContent \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-H "x-litellm-api-key: Bearer sk-1234" \
-d '{
"contents":[{
"role": "user",
@ -43,7 +35,7 @@ const { VertexAI } = require('@google-cloud/vertexai');
const vertexAI = new VertexAI({
project: 'your-project-id', // enter your vertex project id
location: 'us-central1', // enter your vertex region
apiEndpoint: "localhost:4000/vertex-ai" // <proxy-server-url>/vertex-ai # note, do not include 'https://' in the url
apiEndpoint: "localhost:4000/vertex_ai" // <proxy-server-url>/vertex_ai # note, do not include 'https://' in the url
});
const model = vertexAI.getGenerativeModel({
@ -87,7 +79,7 @@ generateContent();
- Tuning API
- CountTokens API
## Authentication to Vertex AI
#### Authentication to Vertex AI
LiteLLM Proxy Server supports two methods of authentication to Vertex AI:
@ -116,9 +108,9 @@ from vertexai.preview.generative_models import GenerativeModel
LITE_LLM_ENDPOINT = "http://localhost:4000"
vertexai.init(
project="<your-vertex-ai-project-id>", # enter your project id
location="<your-vertex-ai-location>", # enter your region
api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex-ai", # route on litellm
project="<your-vertex_ai-project-id>", # enter your project id
location="<your-vertex_ai-location>", # enter your region
api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex_ai", # route on litellm
api_transport="rest",
)
@ -158,7 +150,7 @@ from google.auth.credentials import Credentials
from vertexai.generative_models import GenerativeModel
LITELLM_PROXY_API_KEY = "sk-1234"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
import datetime
@ -219,7 +211,7 @@ import vertexai
from vertexai.generative_models import GenerativeModel
LITELLM_PROXY_API_KEY = "sk-1234"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
vertexai.init(
project="adroit-crow-413218",
@ -247,7 +239,7 @@ from google.auth.credentials import Credentials
from vertexai.generative_models import GenerativeModel
LITELLM_PROXY_API_KEY = "sk-1234"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
import datetime
@ -297,9 +289,9 @@ print(response.text)
<TabItem value="Curl" label="Curl">
```shell
curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:generateContent \
curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:generateContent \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-H "x-litellm-api-key: Bearer sk-1234" \
-d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
```
@ -320,7 +312,7 @@ import vertexai
from vertexai.generative_models import GenerativeModel
LITELLM_PROXY_API_KEY = "sk-1234"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
import datetime
@ -358,7 +350,7 @@ from google.auth.credentials import Credentials
from vertexai.generative_models import GenerativeModel
LITELLM_PROXY_API_KEY = "sk-1234"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
import datetime
@ -413,9 +405,9 @@ def embed_text(
<TabItem value="curl" label="Curl">
```shell
curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:predict \
curl http://localhost:4000/vertex_ai/publishers/google/models/textembedding-gecko@001:predict \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-H "x-litellm-api-key: Bearer sk-1234" \
-d '{"instances":[{"content": "gm"}]}'
```
@ -437,7 +429,7 @@ import vertexai
from google.auth.credentials import Credentials
LITELLM_PROXY_API_KEY = "sk-1234"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
import datetime
@ -482,7 +474,7 @@ import vertexai
from google.auth.credentials import Credentials
LITELLM_PROXY_API_KEY = "sk-1234"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
import datetime
@ -547,9 +539,9 @@ print(f"Created output image using {len(images[0]._image_bytes)} bytes")
<TabItem value="curl" label="Curl">
```shell
curl http://localhost:4000/vertex-ai/publishers/google/models/imagen-3.0-generate-001:predict \
curl http://localhost:4000/vertex_ai/publishers/google/models/imagen-3.0-generate-001:predict \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-H "x-litellm-api-key: Bearer sk-1234" \
-d '{"instances":[{"prompt": "make an otter"}], "parameters": {"sampleCount": 1}}'
```
@ -571,7 +563,7 @@ from vertexai.generative_models import GenerativeModel
import vertexai
LITELLM_PROXY_API_KEY = "sk-1234"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
import datetime
@ -614,7 +606,7 @@ import vertexai
from google.auth.credentials import Credentials
LITELLM_PROXY_API_KEY = "sk-1234"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
import datetime
@ -677,9 +669,9 @@ print(f"Total Token Count: {usage_metadata.total_token_count}")
```shell
curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:countTokens \
curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:countTokens \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-H "x-litellm-api-key: Bearer sk-1234" \
-d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
```
@ -700,7 +692,7 @@ from vertexai.preview.tuning import sft
import vertexai
LITELLM_PROXY_API_KEY = "sk-1234"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
vertexai.init(
@ -741,7 +733,7 @@ import vertexai
from google.auth.credentials import Credentials
LITELLM_PROXY_API_KEY = "sk-1234"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
import datetime
@ -801,9 +793,9 @@ print(sft_tuning_job.experiment)
<TabItem value="curl" label="Curl">
```shell
curl http://localhost:4000/vertex-ai/tuningJobs \
curl http://localhost:4000/vertex_ai/tuningJobs \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-H "x-litellm-api-key: Bearer sk-1234" \
-d '{
"baseModel": "gemini-1.0-pro-002",
"supervisedTuningSpec" : {
@ -872,8 +864,8 @@ httpx_client = httpx.Client(timeout=30)
print("Creating cached content")
create_cache = httpx_client.post(
url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
url=f"{LITELLM_BASE_URL}/vertex_ai/cachedContents",
headers={"x-litellm-api-key": f"Bearer {LITELLM_PROXY_API_KEY}"},
json={
"model": "gemini-1.5-pro-001",
"contents": [
@ -920,5 +912,130 @@ response = client.chat.completions.create(
print("Response from proxy:", response)
```
</TabItem>
</Tabs>
## Advanced
Pre-requisites
- [Setup proxy with DB](../proxy/virtual_keys.md#setup)
Use this, to avoid giving developers the raw Anthropic API key, but still letting them use Anthropic endpoints.
### Use with Virtual Keys
1. Setup environment
```bash
export DATABASE_URL=""
export LITELLM_MASTER_KEY=""
```
```bash
litellm
# RUNNING on http://0.0.0.0:4000
```
2. Generate virtual key
```bash
curl -X POST 'http://0.0.0.0:4000/key/generate' \
-H 'x-litellm-api-key: Bearer sk-1234' \
-H 'Content-Type: application/json' \
-d '{}'
```
Expected Response
```bash
{
...
"key": "sk-1234ewknldferwedojwojw"
}
```
3. Test it!
```bash
curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.0-pro:generateContent \
-H "Content-Type: application/json" \
-H "x-litellm-api-key: Bearer sk-1234" \
-d '{
"contents":[{
"role": "user",
"parts":[{"text": "How are you doing today?"}]
}]
}'
```
### Send `tags` in request headers
Use this if you wants `tags` to be tracked in the LiteLLM DB and on logging callbacks
Pass `tags` in request headers as a comma separated list. In the example below the following tags will be tracked
```
tags: ["vertex-js-sdk", "pass-through-endpoint"]
```
<Tabs>
<TabItem value="curl" label="curl">
```bash
curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \
-H "Content-Type: application/json" \
-H "x-litellm-api-key: Bearer sk-1234" \
-H "tags: vertex-js-sdk,pass-through-endpoint" \
-d '{
"contents":[{
"role": "user",
"parts":[{"text": "How are you doing today?"}]
}]
}'
```
</TabItem>
<TabItem value="js" label="Vertex Node.js SDK">
```javascript
const { VertexAI } = require('@google-cloud/vertexai');
const vertexAI = new VertexAI({
project: 'your-project-id', // enter your vertex project id
location: 'us-central1', // enter your vertex region
apiEndpoint: "localhost:4000/vertex_ai" // <proxy-server-url>/vertex_ai # note, do not include 'https://' in the url
});
const model = vertexAI.getGenerativeModel({
model: 'gemini-1.0-pro'
}, {
customHeaders: {
"x-litellm-api-key": "sk-1234", // Your litellm Virtual Key
"tags": "vertex-js-sdk,pass-through-endpoint"
}
});
async function generateContent() {
try {
const prompt = {
contents: [{
role: 'user',
parts: [{ text: 'How are you doing today?' }]
}]
};
const response = await model.generateContent(prompt);
console.log('Response:', response);
} catch (error) {
console.error('Error:', error);
}
}
generateContent();
```
</TabItem>
</Tabs>