(feat) use @google-cloud/vertexai js sdk with litellm (#6873)

* stash gemini JS test * add vertex js sdj example * handle vertex pass through separately * tes vertex JS sdk * fix vertex_proxy_route * use PassThroughStreamingHandler * fix PassThroughStreamingHandler * use common _create_vertex_response_logging_payload_for_generate_content * test vertex js * add working vertex jest tests * move basic bass through test * use good name for test * test vertex * test_chunk_processor_yields_raw_bytes * unit tests for streaming * test_convert_raw_bytes_to_str_lines * run unit tests 1st * simplify local * docs add usage example for js * use get_litellm_virtual_key * add unit tests for vertex pass through
2024-11-22 16:50:10 -08:00 · 2024-11-22 16:50:10 -08:00 · b2b3e40d13
commit b2b3e40d13
parent 5930c42e74
14 changed files with 680 additions and 89 deletions
--- a/tests/pass_through_tests/test_local_vertex.js
+++ b/tests/pass_through_tests/test_local_vertex.js
@ -0,0 +1,68 @@
+const { VertexAI, RequestOptions } = require('@google-cloud/vertexai');
+
+
+// Import fetch if the SDK uses it
+const originalFetch = global.fetch || require('node-fetch');
+
+// Monkey-patch the fetch used internally
+global.fetch = async function patchedFetch(url, options) {
+    // Modify the URL to use HTTP instead of HTTPS
+    if (url.startsWith('https://localhost:4000')) {
+        url = url.replace('https://', 'http://');
+    }
+    console.log('Patched fetch sending request to:', url);
+    return originalFetch(url, options);
+};
+
+const vertexAI = new VertexAI({
+    project: 'adroit-crow-413218',
+    location: 'us-central1',
+    apiEndpoint: "localhost:4000/vertex-ai"
+});
+
+
+// Use customHeaders in RequestOptions
+const requestOptions = {
+    customHeaders: new Headers({
+        "x-litellm-api-key": "sk-1234"
+    })
+};
+
+const generativeModel = vertexAI.getGenerativeModel(
+    { model: 'gemini-1.0-pro' },
+    requestOptions
+);
+
+async function streamingResponse() {
+    try {
+        const request = {
+            contents: [{role: 'user', parts: [{text: 'How are you doing today tell me your name?'}]}],
+          };
+        const streamingResult = await generativeModel.generateContentStream(request);
+        for await (const item of streamingResult.stream) {
+            console.log('stream chunk: ', JSON.stringify(item));
+        }
+        const aggregatedResponse = await streamingResult.response;
+        console.log('aggregated response: ', JSON.stringify(aggregatedResponse));
+    } catch (error) {
+        console.error('Error:', error);
+    }
+}
+
+
+async function nonStreamingResponse() {
+    try {
+        const request = {
+            contents: [{role: 'user', parts: [{text: 'How are you doing today tell me your name?'}]}],
+          };
+        const response = await generativeModel.generateContent(request);
+        console.log('non streaming response: ', JSON.stringify(response));
+    } catch (error) {
+        console.error('Error:', error);
+    }
+}
+
+
+
+streamingResponse();
+nonStreamingResponse();