forked from phoenix/litellm-mirror
docs on vertex js pass tags
This commit is contained in:
parent
5baf33bf8f
commit
80045112d0
1 changed files with 156 additions and 39 deletions
|
@ -4,17 +4,9 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Vertex AI SDK
|
# Vertex AI SDK
|
||||||
|
|
||||||
Use VertexAI SDK to call endpoints on LiteLLM Gateway (native provider format)
|
|
||||||
|
|
||||||
:::tip
|
|
||||||
|
|
||||||
Looking for the Unified API (OpenAI format) for VertexAI ? [Go here - using vertexAI with LiteLLM SDK or LiteLLM Proxy Server](../providers/vertex.md)
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
Pass-through endpoints for Vertex AI - call provider-specific endpoint, in native format (no translation).
|
Pass-through endpoints for Vertex AI - call provider-specific endpoint, in native format (no translation).
|
||||||
|
|
||||||
Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE_URL/vertex-ai`
|
Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE_URL/vertex_ai`
|
||||||
|
|
||||||
|
|
||||||
#### **Example Usage**
|
#### **Example Usage**
|
||||||
|
@ -23,9 +15,9 @@ Just replace `https://REGION-aiplatform.googleapis.com` with `LITELLM_PROXY_BASE
|
||||||
<TabItem value="curl" label="curl">
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \
|
curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.0-pro:generateContent \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-H "Authorization: Bearer sk-1234" \
|
-H "x-litellm-api-key: Bearer sk-1234" \
|
||||||
-d '{
|
-d '{
|
||||||
"contents":[{
|
"contents":[{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
@ -43,7 +35,7 @@ const { VertexAI } = require('@google-cloud/vertexai');
|
||||||
const vertexAI = new VertexAI({
|
const vertexAI = new VertexAI({
|
||||||
project: 'your-project-id', // enter your vertex project id
|
project: 'your-project-id', // enter your vertex project id
|
||||||
location: 'us-central1', // enter your vertex region
|
location: 'us-central1', // enter your vertex region
|
||||||
apiEndpoint: "localhost:4000/vertex-ai" // <proxy-server-url>/vertex-ai # note, do not include 'https://' in the url
|
apiEndpoint: "localhost:4000/vertex_ai" // <proxy-server-url>/vertex_ai # note, do not include 'https://' in the url
|
||||||
});
|
});
|
||||||
|
|
||||||
const model = vertexAI.getGenerativeModel({
|
const model = vertexAI.getGenerativeModel({
|
||||||
|
@ -87,7 +79,7 @@ generateContent();
|
||||||
- Tuning API
|
- Tuning API
|
||||||
- CountTokens API
|
- CountTokens API
|
||||||
|
|
||||||
## Authentication to Vertex AI
|
#### Authentication to Vertex AI
|
||||||
|
|
||||||
LiteLLM Proxy Server supports two methods of authentication to Vertex AI:
|
LiteLLM Proxy Server supports two methods of authentication to Vertex AI:
|
||||||
|
|
||||||
|
@ -116,9 +108,9 @@ from vertexai.preview.generative_models import GenerativeModel
|
||||||
LITE_LLM_ENDPOINT = "http://localhost:4000"
|
LITE_LLM_ENDPOINT = "http://localhost:4000"
|
||||||
|
|
||||||
vertexai.init(
|
vertexai.init(
|
||||||
project="<your-vertex-ai-project-id>", # enter your project id
|
project="<your-vertex_ai-project-id>", # enter your project id
|
||||||
location="<your-vertex-ai-location>", # enter your region
|
location="<your-vertex_ai-location>", # enter your region
|
||||||
api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex-ai", # route on litellm
|
api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex_ai", # route on litellm
|
||||||
api_transport="rest",
|
api_transport="rest",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -158,7 +150,7 @@ from google.auth.credentials import Credentials
|
||||||
from vertexai.generative_models import GenerativeModel
|
from vertexai.generative_models import GenerativeModel
|
||||||
|
|
||||||
LITELLM_PROXY_API_KEY = "sk-1234"
|
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||||
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
|
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
@ -219,7 +211,7 @@ import vertexai
|
||||||
from vertexai.generative_models import GenerativeModel
|
from vertexai.generative_models import GenerativeModel
|
||||||
|
|
||||||
LITELLM_PROXY_API_KEY = "sk-1234"
|
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||||
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
|
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
|
||||||
|
|
||||||
vertexai.init(
|
vertexai.init(
|
||||||
project="adroit-crow-413218",
|
project="adroit-crow-413218",
|
||||||
|
@ -247,7 +239,7 @@ from google.auth.credentials import Credentials
|
||||||
from vertexai.generative_models import GenerativeModel
|
from vertexai.generative_models import GenerativeModel
|
||||||
|
|
||||||
LITELLM_PROXY_API_KEY = "sk-1234"
|
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||||
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
|
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
@ -297,9 +289,9 @@ print(response.text)
|
||||||
<TabItem value="Curl" label="Curl">
|
<TabItem value="Curl" label="Curl">
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:generateContent \
|
curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:generateContent \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-H "Authorization: Bearer sk-1234" \
|
-H "x-litellm-api-key: Bearer sk-1234" \
|
||||||
-d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
|
-d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -320,7 +312,7 @@ import vertexai
|
||||||
from vertexai.generative_models import GenerativeModel
|
from vertexai.generative_models import GenerativeModel
|
||||||
|
|
||||||
LITELLM_PROXY_API_KEY = "sk-1234"
|
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||||
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
|
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
@ -358,7 +350,7 @@ from google.auth.credentials import Credentials
|
||||||
from vertexai.generative_models import GenerativeModel
|
from vertexai.generative_models import GenerativeModel
|
||||||
|
|
||||||
LITELLM_PROXY_API_KEY = "sk-1234"
|
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||||
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
|
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
@ -413,9 +405,9 @@ def embed_text(
|
||||||
<TabItem value="curl" label="Curl">
|
<TabItem value="curl" label="Curl">
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:predict \
|
curl http://localhost:4000/vertex_ai/publishers/google/models/textembedding-gecko@001:predict \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-H "Authorization: Bearer sk-1234" \
|
-H "x-litellm-api-key: Bearer sk-1234" \
|
||||||
-d '{"instances":[{"content": "gm"}]}'
|
-d '{"instances":[{"content": "gm"}]}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -437,7 +429,7 @@ import vertexai
|
||||||
from google.auth.credentials import Credentials
|
from google.auth.credentials import Credentials
|
||||||
|
|
||||||
LITELLM_PROXY_API_KEY = "sk-1234"
|
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||||
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
|
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
@ -482,7 +474,7 @@ import vertexai
|
||||||
from google.auth.credentials import Credentials
|
from google.auth.credentials import Credentials
|
||||||
|
|
||||||
LITELLM_PROXY_API_KEY = "sk-1234"
|
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||||
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
|
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
@ -547,9 +539,9 @@ print(f"Created output image using {len(images[0]._image_bytes)} bytes")
|
||||||
<TabItem value="curl" label="Curl">
|
<TabItem value="curl" label="Curl">
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:4000/vertex-ai/publishers/google/models/imagen-3.0-generate-001:predict \
|
curl http://localhost:4000/vertex_ai/publishers/google/models/imagen-3.0-generate-001:predict \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-H "Authorization: Bearer sk-1234" \
|
-H "x-litellm-api-key: Bearer sk-1234" \
|
||||||
-d '{"instances":[{"prompt": "make an otter"}], "parameters": {"sampleCount": 1}}'
|
-d '{"instances":[{"prompt": "make an otter"}], "parameters": {"sampleCount": 1}}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -571,7 +563,7 @@ from vertexai.generative_models import GenerativeModel
|
||||||
import vertexai
|
import vertexai
|
||||||
|
|
||||||
LITELLM_PROXY_API_KEY = "sk-1234"
|
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||||
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
|
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
@ -614,7 +606,7 @@ import vertexai
|
||||||
from google.auth.credentials import Credentials
|
from google.auth.credentials import Credentials
|
||||||
|
|
||||||
LITELLM_PROXY_API_KEY = "sk-1234"
|
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||||
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
|
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
@ -677,9 +669,9 @@ print(f"Total Token Count: {usage_metadata.total_token_count}")
|
||||||
|
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:countTokens \
|
curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:countTokens \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-H "Authorization: Bearer sk-1234" \
|
-H "x-litellm-api-key: Bearer sk-1234" \
|
||||||
-d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
|
-d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -700,7 +692,7 @@ from vertexai.preview.tuning import sft
|
||||||
import vertexai
|
import vertexai
|
||||||
|
|
||||||
LITELLM_PROXY_API_KEY = "sk-1234"
|
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||||
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
|
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
|
||||||
|
|
||||||
|
|
||||||
vertexai.init(
|
vertexai.init(
|
||||||
|
@ -741,7 +733,7 @@ import vertexai
|
||||||
from google.auth.credentials import Credentials
|
from google.auth.credentials import Credentials
|
||||||
|
|
||||||
LITELLM_PROXY_API_KEY = "sk-1234"
|
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||||
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex-ai"
|
LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai"
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
@ -801,9 +793,9 @@ print(sft_tuning_job.experiment)
|
||||||
<TabItem value="curl" label="Curl">
|
<TabItem value="curl" label="Curl">
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:4000/vertex-ai/tuningJobs \
|
curl http://localhost:4000/vertex_ai/tuningJobs \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-H "Authorization: Bearer sk-1234" \
|
-H "x-litellm-api-key: Bearer sk-1234" \
|
||||||
-d '{
|
-d '{
|
||||||
"baseModel": "gemini-1.0-pro-002",
|
"baseModel": "gemini-1.0-pro-002",
|
||||||
"supervisedTuningSpec" : {
|
"supervisedTuningSpec" : {
|
||||||
|
@ -872,8 +864,8 @@ httpx_client = httpx.Client(timeout=30)
|
||||||
|
|
||||||
print("Creating cached content")
|
print("Creating cached content")
|
||||||
create_cache = httpx_client.post(
|
create_cache = httpx_client.post(
|
||||||
url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
|
url=f"{LITELLM_BASE_URL}/vertex_ai/cachedContents",
|
||||||
headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
|
headers={"x-litellm-api-key": f"Bearer {LITELLM_PROXY_API_KEY}"},
|
||||||
json={
|
json={
|
||||||
"model": "gemini-1.5-pro-001",
|
"model": "gemini-1.5-pro-001",
|
||||||
"contents": [
|
"contents": [
|
||||||
|
@ -920,5 +912,130 @@ response = client.chat.completions.create(
|
||||||
print("Response from proxy:", response)
|
print("Response from proxy:", response)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## Advanced
|
||||||
|
|
||||||
|
Pre-requisites
|
||||||
|
- [Setup proxy with DB](../proxy/virtual_keys.md#setup)
|
||||||
|
|
||||||
|
Use this, to avoid giving developers the raw Anthropic API key, but still letting them use Anthropic endpoints.
|
||||||
|
|
||||||
|
### Use with Virtual Keys
|
||||||
|
|
||||||
|
1. Setup environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export DATABASE_URL=""
|
||||||
|
export LITELLM_MASTER_KEY=""
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Generate virtual key
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
||||||
|
-H 'x-litellm-api-key: Bearer sk-1234' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected Response
|
||||||
|
|
||||||
|
```bash
|
||||||
|
{
|
||||||
|
...
|
||||||
|
"key": "sk-1234ewknldferwedojwojw"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.0-pro:generateContent \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "x-litellm-api-key: Bearer sk-1234" \
|
||||||
|
-d '{
|
||||||
|
"contents":[{
|
||||||
|
"role": "user",
|
||||||
|
"parts":[{"text": "How are you doing today?"}]
|
||||||
|
}]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Send `tags` in request headers
|
||||||
|
|
||||||
|
Use this if you wants `tags` to be tracked in the LiteLLM DB and on logging callbacks
|
||||||
|
|
||||||
|
Pass `tags` in request headers as a comma separated list. In the example below the following tags will be tracked
|
||||||
|
|
||||||
|
```
|
||||||
|
tags: ["vertex-js-sdk", "pass-through-endpoint"]
|
||||||
|
```
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "x-litellm-api-key: Bearer sk-1234" \
|
||||||
|
-H "tags: vertex-js-sdk,pass-through-endpoint" \
|
||||||
|
-d '{
|
||||||
|
"contents":[{
|
||||||
|
"role": "user",
|
||||||
|
"parts":[{"text": "How are you doing today?"}]
|
||||||
|
}]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="js" label="Vertex Node.js SDK">
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
const { VertexAI } = require('@google-cloud/vertexai');
|
||||||
|
|
||||||
|
const vertexAI = new VertexAI({
|
||||||
|
project: 'your-project-id', // enter your vertex project id
|
||||||
|
location: 'us-central1', // enter your vertex region
|
||||||
|
apiEndpoint: "localhost:4000/vertex_ai" // <proxy-server-url>/vertex_ai # note, do not include 'https://' in the url
|
||||||
|
});
|
||||||
|
|
||||||
|
const model = vertexAI.getGenerativeModel({
|
||||||
|
model: 'gemini-1.0-pro'
|
||||||
|
}, {
|
||||||
|
customHeaders: {
|
||||||
|
"x-litellm-api-key": "sk-1234", // Your litellm Virtual Key
|
||||||
|
"tags": "vertex-js-sdk,pass-through-endpoint"
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
async function generateContent() {
|
||||||
|
try {
|
||||||
|
const prompt = {
|
||||||
|
contents: [{
|
||||||
|
role: 'user',
|
||||||
|
parts: [{ text: 'How are you doing today?' }]
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
|
||||||
|
const response = await model.generateContent(prompt);
|
||||||
|
console.log('Response:', response);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
generateContent();
|
||||||
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
Loading…
Add table
Add a link
Reference in a new issue