(feat) use @google-cloud/vertexai js sdk with litellm (#6873)

* stash gemini JS test * add vertex js sdj example * handle vertex pass through separately * tes vertex JS sdk * fix vertex_proxy_route * use PassThroughStreamingHandler * fix PassThroughStreamingHandler * use common _create_vertex_response_logging_payload_for_generate_content * test vertex js * add working vertex jest tests * move basic bass through test * use good name for test * test vertex * test_chunk_processor_yields_raw_bytes * unit tests for streaming * test_convert_raw_bytes_to_str_lines * run unit tests 1st * simplify local * docs add usage example for js * use get_litellm_virtual_key * add unit tests for vertex pass through
2024-11-22 16:50:10 -08:00 · 2024-11-22 16:50:10 -08:00 · b2b3e40d13
commit b2b3e40d13
parent 5930c42e74
14 changed files with 680 additions and 89 deletions
--- a/tests/pass_through_tests/test_anthropic_passthrough_python_sdkpy
+++ b/tests/pass_through_tests/test_anthropic_passthrough_python_sdkpy
@ -0,0 +1,38 @@
+"""
+This test ensures that the proxy can passthrough anthropic requests
+"""
+
+import pytest
+import anthropic
+
+client = anthropic.Anthropic(
+    base_url="http://0.0.0.0:4000/anthropic", api_key="sk-1234"
+)
+
+
+def test_anthropic_basic_completion():
+    print("making basic completion request to anthropic passthrough")
+    response = client.messages.create(
+        model="claude-3-5-sonnet-20241022",
+        max_tokens=1024,
+        messages=[{"role": "user", "content": "Say 'hello test' and nothing else"}],
+    )
+    print(response)
+
+
+def test_anthropic_streaming():
+    print("making streaming request to anthropic passthrough")
+    collected_output = []
+
+    with client.messages.stream(
+        max_tokens=10,
+        messages=[
+            {"role": "user", "content": "Say 'hello stream test' and nothing else"}
+        ],
+        model="claude-3-5-sonnet-20241022",
+    ) as stream:
+        for text in stream.text_stream:
+            collected_output.append(text)
+
+    full_response = "".join(collected_output)
+    print(full_response)
--- a/tests/pass_through_tests/test_gemini.js
+++ b/tests/pass_through_tests/test_gemini.js
@ -0,0 +1,23 @@
+// const { GoogleGenerativeAI } = require("@google/generative-ai");
+
+// const genAI = new GoogleGenerativeAI("sk-1234");
+// const model = genAI.getGenerativeModel({ model: "gemini-1.5-flash" });
+
+// const prompt = "Explain how AI works in 2 pages";
+
+// async function run() {
+//     try {
+//         const result = await model.generateContentStream(prompt, { baseUrl: "http://localhost:4000/gemini" });
+//         const response = await result.response;
+//         console.log(response.text());
+//         for await (const chunk of result.stream) {
+//             const chunkText = chunk.text();
+//             console.log(chunkText);
+//             process.stdout.write(chunkText);
+//         }
+//     } catch (error) {
+//         console.error("Error:", error);
+//     }
+// }
+
+// run();
--- a/tests/pass_through_tests/test_local_vertex.js
+++ b/tests/pass_through_tests/test_local_vertex.js
@ -0,0 +1,68 @@
+const { VertexAI, RequestOptions } = require('@google-cloud/vertexai');
+
+
+// Import fetch if the SDK uses it
+const originalFetch = global.fetch || require('node-fetch');
+
+// Monkey-patch the fetch used internally
+global.fetch = async function patchedFetch(url, options) {
+    // Modify the URL to use HTTP instead of HTTPS
+    if (url.startsWith('https://localhost:4000')) {
+        url = url.replace('https://', 'http://');
+    }
+    console.log('Patched fetch sending request to:', url);
+    return originalFetch(url, options);
+};
+
+const vertexAI = new VertexAI({
+    project: 'adroit-crow-413218',
+    location: 'us-central1',
+    apiEndpoint: "localhost:4000/vertex-ai"
+});
+
+
+// Use customHeaders in RequestOptions
+const requestOptions = {
+    customHeaders: new Headers({
+        "x-litellm-api-key": "sk-1234"
+    })
+};
+
+const generativeModel = vertexAI.getGenerativeModel(
+    { model: 'gemini-1.0-pro' },
+    requestOptions
+);
+
+async function streamingResponse() {
+    try {
+        const request = {
+            contents: [{role: 'user', parts: [{text: 'How are you doing today tell me your name?'}]}],
+          };
+        const streamingResult = await generativeModel.generateContentStream(request);
+        for await (const item of streamingResult.stream) {
+            console.log('stream chunk: ', JSON.stringify(item));
+        }
+        const aggregatedResponse = await streamingResult.response;
+        console.log('aggregated response: ', JSON.stringify(aggregatedResponse));
+    } catch (error) {
+        console.error('Error:', error);
+    }
+}
+
+
+async function nonStreamingResponse() {
+    try {
+        const request = {
+            contents: [{role: 'user', parts: [{text: 'How are you doing today tell me your name?'}]}],
+          };
+        const response = await generativeModel.generateContent(request);
+        console.log('non streaming response: ', JSON.stringify(response));
+    } catch (error) {
+        console.error('Error:', error);
+    }
+}
+
+
+
+streamingResponse();
+nonStreamingResponse();
--- a/tests/pass_through_tests/test_vertex.test.js
+++ b/tests/pass_through_tests/test_vertex.test.js
@ -0,0 +1,114 @@
+const { VertexAI, RequestOptions } = require('@google-cloud/vertexai');
+const fs = require('fs');
+const path = require('path');
+const os = require('os');
+const { writeFileSync } = require('fs');
+
+
+// Import fetch if the SDK uses it
+const originalFetch = global.fetch || require('node-fetch');
+
+// Monkey-patch the fetch used internally
+global.fetch = async function patchedFetch(url, options) {
+    // Modify the URL to use HTTP instead of HTTPS
+    if (url.startsWith('https://localhost:4000')) {
+        url = url.replace('https://', 'http://');
+    }
+    console.log('Patched fetch sending request to:', url);
+    return originalFetch(url, options);
+};
+
+function loadVertexAiCredentials() {
+    console.log("loading vertex ai credentials");
+    const filepath = path.dirname(__filename);
+    const vertexKeyPath = path.join(filepath, "vertex_key.json");
+
+    // Initialize default empty service account data
+    let serviceAccountKeyData = {};
+
+    // Try to read existing vertex_key.json
+    try {
+        const content = fs.readFileSync(vertexKeyPath, 'utf8');
+        if (content && content.trim()) {
+            serviceAccountKeyData = JSON.parse(content);
+        }
+    } catch (error) {
+        // File doesn't exist or is invalid, continue with empty object
+    }
+
+    // Update with environment variables
+    const privateKeyId = process.env.VERTEX_AI_PRIVATE_KEY_ID || "";
+    const privateKey = (process.env.VERTEX_AI_PRIVATE_KEY || "").replace(/\\n/g, "\n");
+    
+    serviceAccountKeyData.private_key_id = privateKeyId;
+    serviceAccountKeyData.private_key = privateKey;
+
+    // Create temporary file
+    const tempFilePath = path.join(os.tmpdir(), `vertex-credentials-${Date.now()}.json`);
+    writeFileSync(tempFilePath, JSON.stringify(serviceAccountKeyData, null, 2));
+    
+    // Set environment variable
+    process.env.GOOGLE_APPLICATION_CREDENTIALS = tempFilePath;
+}
+
+// Run credential loading before tests
+beforeAll(() => {
+    loadVertexAiCredentials();
+});
+
+
+
+describe('Vertex AI Tests', () => {
+    test('should successfully generate content from Vertex AI', async () => {
+        const vertexAI = new VertexAI({
+            project: 'adroit-crow-413218',
+            location: 'us-central1',
+            apiEndpoint: "localhost:4000/vertex-ai"
+        });
+
+        const customHeaders = new Headers({
+            "x-litellm-api-key": "sk-1234"
+        });
+
+        const requestOptions = {
+            customHeaders: customHeaders
+        };
+
+        const generativeModel = vertexAI.getGenerativeModel(
+            { model: 'gemini-1.0-pro' },
+            requestOptions
+        );
+
+        const request = {
+            contents: [{role: 'user', parts: [{text: 'How are you doing today tell me your name?'}]}],
+        };
+
+        const streamingResult = await generativeModel.generateContentStream(request);
+        
+        // Add some assertions
+        expect(streamingResult).toBeDefined();
+        
+        for await (const item of streamingResult.stream) {
+            console.log('stream chunk:', JSON.stringify(item));
+            expect(item).toBeDefined();
+        }
+
+        const aggregatedResponse = await streamingResult.response;
+        console.log('aggregated response:', JSON.stringify(aggregatedResponse));
+        expect(aggregatedResponse).toBeDefined();
+    });
+
+
+    test('should successfully generate non-streaming content from Vertex AI', async () => {
+        const vertexAI = new VertexAI({project: 'adroit-crow-413218', location: 'us-central1', apiEndpoint: "localhost:4000/vertex-ai"});
+        const customHeaders = new Headers({"x-litellm-api-key": "sk-1234"});
+        const requestOptions = {customHeaders: customHeaders};
+        const generativeModel = vertexAI.getGenerativeModel({model: 'gemini-1.0-pro'}, requestOptions);
+        const request = {contents: [{role: 'user', parts: [{text: 'What is 2+2?'}]}]};
+
+        const result = await generativeModel.generateContent(request);
+        expect(result).toBeDefined();
+        expect(result.response).toBeDefined();
+        console.log('non-streaming response:', JSON.stringify(result.response));
+    });
+});