(feat) use @google-cloud/vertexai js sdk with litellm (#6873)

* stash gemini JS test * add vertex js sdj example * handle vertex pass through separately * tes vertex JS sdk * fix vertex_proxy_route * use PassThroughStreamingHandler * fix PassThroughStreamingHandler * use common _create_vertex_response_logging_payload_for_generate_content * test vertex js * add working vertex jest tests * move basic bass through test * use good name for test * test vertex * test_chunk_processor_yields_raw_bytes * unit tests for streaming * test_convert_raw_bytes_to_str_lines * run unit tests 1st * simplify local * docs add usage example for js * use get_litellm_virtual_key * add unit tests for vertex pass through
2024-11-22 16:50:10 -08:00 · 2024-11-22 16:50:10 -08:00 · b2b3e40d13
commit b2b3e40d13
parent 5930c42e74
14 changed files with 680 additions and 89 deletions
--- a/tests/pass_through_tests/test_anthropic_passthrough_python_sdkpy
+++ b/tests/pass_through_tests/test_anthropic_passthrough_python_sdkpy
--- a/tests/pass_through_tests/test_gemini.js
+++ b/tests/pass_through_tests/test_gemini.js
@ -0,0 +1,23 @@
+// const { GoogleGenerativeAI } = require("@google/generative-ai");
+
+// const genAI = new GoogleGenerativeAI("sk-1234");
+// const model = genAI.getGenerativeModel({ model: "gemini-1.5-flash" });
+
+// const prompt = "Explain how AI works in 2 pages";
+
+// async function run() {
+//     try {
+//         const result = await model.generateContentStream(prompt, { baseUrl: "http://localhost:4000/gemini" });
+//         const response = await result.response;
+//         console.log(response.text());
+//         for await (const chunk of result.stream) {
+//             const chunkText = chunk.text();
+//             console.log(chunkText);
+//             process.stdout.write(chunkText);
+//         }
+//     } catch (error) {
+//         console.error("Error:", error);
+//     }
+// }
+
+// run();
--- a/tests/pass_through_tests/test_local_vertex.js
+++ b/tests/pass_through_tests/test_local_vertex.js
@ -0,0 +1,68 @@
+const { VertexAI, RequestOptions } = require('@google-cloud/vertexai');
+
+
+// Import fetch if the SDK uses it
+const originalFetch = global.fetch || require('node-fetch');
+
+// Monkey-patch the fetch used internally
+global.fetch = async function patchedFetch(url, options) {
+    // Modify the URL to use HTTP instead of HTTPS
+    if (url.startsWith('https://localhost:4000')) {
+        url = url.replace('https://', 'http://');
+    }
+    console.log('Patched fetch sending request to:', url);
+    return originalFetch(url, options);
+};
+
+const vertexAI = new VertexAI({
+    project: 'adroit-crow-413218',
+    location: 'us-central1',
+    apiEndpoint: "localhost:4000/vertex-ai"
+});
+
+
+// Use customHeaders in RequestOptions
+const requestOptions = {
+    customHeaders: new Headers({
+        "x-litellm-api-key": "sk-1234"
+    })
+};
+
+const generativeModel = vertexAI.getGenerativeModel(
+    { model: 'gemini-1.0-pro' },
+    requestOptions
+);
+
+async function streamingResponse() {
+    try {
+        const request = {
+            contents: [{role: 'user', parts: [{text: 'How are you doing today tell me your name?'}]}],
+          };
+        const streamingResult = await generativeModel.generateContentStream(request);
+        for await (const item of streamingResult.stream) {
+            console.log('stream chunk: ', JSON.stringify(item));
+        }
+        const aggregatedResponse = await streamingResult.response;
+        console.log('aggregated response: ', JSON.stringify(aggregatedResponse));
+    } catch (error) {
+        console.error('Error:', error);
+    }
+}
+
+
+async function nonStreamingResponse() {
+    try {
+        const request = {
+            contents: [{role: 'user', parts: [{text: 'How are you doing today tell me your name?'}]}],
+          };
+        const response = await generativeModel.generateContent(request);
+        console.log('non streaming response: ', JSON.stringify(response));
+    } catch (error) {
+        console.error('Error:', error);
+    }
+}
+
+
+
+streamingResponse();
+nonStreamingResponse();
--- a/tests/pass_through_tests/test_vertex.test.js
+++ b/tests/pass_through_tests/test_vertex.test.js
@ -0,0 +1,114 @@
+const { VertexAI, RequestOptions } = require('@google-cloud/vertexai');
+const fs = require('fs');
+const path = require('path');
+const os = require('os');
+const { writeFileSync } = require('fs');
+
+
+// Import fetch if the SDK uses it
+const originalFetch = global.fetch || require('node-fetch');
+
+// Monkey-patch the fetch used internally
+global.fetch = async function patchedFetch(url, options) {
+    // Modify the URL to use HTTP instead of HTTPS
+    if (url.startsWith('https://localhost:4000')) {
+        url = url.replace('https://', 'http://');
+    }
+    console.log('Patched fetch sending request to:', url);
+    return originalFetch(url, options);
+};
+
+function loadVertexAiCredentials() {
+    console.log("loading vertex ai credentials");
+    const filepath = path.dirname(__filename);
+    const vertexKeyPath = path.join(filepath, "vertex_key.json");
+
+    // Initialize default empty service account data
+    let serviceAccountKeyData = {};
+
+    // Try to read existing vertex_key.json
+    try {
+        const content = fs.readFileSync(vertexKeyPath, 'utf8');
+        if (content && content.trim()) {
+            serviceAccountKeyData = JSON.parse(content);
+        }
+    } catch (error) {
+        // File doesn't exist or is invalid, continue with empty object
+    }
+
+    // Update with environment variables
+    const privateKeyId = process.env.VERTEX_AI_PRIVATE_KEY_ID || "";
+    const privateKey = (process.env.VERTEX_AI_PRIVATE_KEY || "").replace(/\\n/g, "\n");
+    
+    serviceAccountKeyData.private_key_id = privateKeyId;
+    serviceAccountKeyData.private_key = privateKey;
+
+    // Create temporary file
+    const tempFilePath = path.join(os.tmpdir(), `vertex-credentials-${Date.now()}.json`);
+    writeFileSync(tempFilePath, JSON.stringify(serviceAccountKeyData, null, 2));
+    
+    // Set environment variable
+    process.env.GOOGLE_APPLICATION_CREDENTIALS = tempFilePath;
+}
+
+// Run credential loading before tests
+beforeAll(() => {
+    loadVertexAiCredentials();
+});
+
+
+
+describe('Vertex AI Tests', () => {
+    test('should successfully generate content from Vertex AI', async () => {
+        const vertexAI = new VertexAI({
+            project: 'adroit-crow-413218',
+            location: 'us-central1',
+            apiEndpoint: "localhost:4000/vertex-ai"
+        });
+
+        const customHeaders = new Headers({
+            "x-litellm-api-key": "sk-1234"
+        });
+
+        const requestOptions = {
+            customHeaders: customHeaders
+        };
+
+        const generativeModel = vertexAI.getGenerativeModel(
+            { model: 'gemini-1.0-pro' },
+            requestOptions
+        );
+
+        const request = {
+            contents: [{role: 'user', parts: [{text: 'How are you doing today tell me your name?'}]}],
+        };
+
+        const streamingResult = await generativeModel.generateContentStream(request);
+        
+        // Add some assertions
+        expect(streamingResult).toBeDefined();
+        
+        for await (const item of streamingResult.stream) {
+            console.log('stream chunk:', JSON.stringify(item));
+            expect(item).toBeDefined();
+        }
+
+        const aggregatedResponse = await streamingResult.response;
+        console.log('aggregated response:', JSON.stringify(aggregatedResponse));
+        expect(aggregatedResponse).toBeDefined();
+    });
+
+
+    test('should successfully generate non-streaming content from Vertex AI', async () => {
+        const vertexAI = new VertexAI({project: 'adroit-crow-413218', location: 'us-central1', apiEndpoint: "localhost:4000/vertex-ai"});
+        const customHeaders = new Headers({"x-litellm-api-key": "sk-1234"});
+        const requestOptions = {customHeaders: customHeaders};
+        const generativeModel = vertexAI.getGenerativeModel({model: 'gemini-1.0-pro'}, requestOptions);
+        const request = {contents: [{role: 'user', parts: [{text: 'What is 2+2?'}]}]};
+
+        const result = await generativeModel.generateContent(request);
+        expect(result).toBeDefined();
+        expect(result.response).toBeDefined();
+        console.log('non-streaming response:', JSON.stringify(result.response));
+    });
+});
--- a/tests/pass_through_unit_tests/test_unit_test_anthropic_pass_through.py
+++ b/tests/pass_through_unit_tests/test_unit_test_anthropic_pass_through.py
--- a/tests/pass_through_unit_tests/test_unit_test_streaming.py
+++ b/tests/pass_through_unit_tests/test_unit_test_streaming.py
@ -0,0 +1,118 @@
+import json
+import os
+import sys
+from datetime import datetime
+from unittest.mock import AsyncMock, Mock, patch, MagicMock
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+import httpx
+import pytest
+import litellm
+from typing import AsyncGenerator
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.proxy.pass_through_endpoints.types import EndpointType
+from litellm.proxy.pass_through_endpoints.success_handler import (
+    PassThroughEndpointLogging,
+)
+from litellm.proxy.pass_through_endpoints.streaming_handler import (
+    PassThroughStreamingHandler,
+)
+
+
+# Helper function to mock async iteration
+async def aiter_mock(iterable):
+    for item in iterable:
+        yield item
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "endpoint_type,url_route",
+    [
+        (
+            EndpointType.VERTEX_AI,
+            "v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro:generateContent",
+        ),
+        (EndpointType.ANTHROPIC, "/v1/messages"),
+    ],
+)
+async def test_chunk_processor_yields_raw_bytes(endpoint_type, url_route):
+    """
+    Test that the chunk_processor yields raw bytes
+
+    This is CRITICAL for pass throughs streaming with Vertex AI and Anthropic
+    """
+    # Mock inputs
+    response = AsyncMock(spec=httpx.Response)
+    raw_chunks = [
+        b'{"id": "1", "content": "Hello"}',
+        b'{"id": "2", "content": "World"}',
+        b'\n\ndata: {"id": "3"}',  # Testing different byte formats
+    ]
+
+    # Mock aiter_bytes to return an async generator
+    async def mock_aiter_bytes():
+        for chunk in raw_chunks:
+            yield chunk
+
+    response.aiter_bytes = mock_aiter_bytes
+
+    request_body = {"key": "value"}
+    litellm_logging_obj = MagicMock()
+    start_time = datetime.now()
+    passthrough_success_handler_obj = MagicMock()
+
+    # Capture yielded chunks and perform detailed assertions
+    received_chunks = []
+    async for chunk in PassThroughStreamingHandler.chunk_processor(
+        response=response,
+        request_body=request_body,
+        litellm_logging_obj=litellm_logging_obj,
+        endpoint_type=endpoint_type,
+        start_time=start_time,
+        passthrough_success_handler_obj=passthrough_success_handler_obj,
+        url_route=url_route,
+    ):
+        # Assert each chunk is bytes
+        assert isinstance(chunk, bytes), f"Chunk should be bytes, got {type(chunk)}"
+        # Assert no decoding/encoding occurred (chunk should be exactly as input)
+        assert (
+            chunk in raw_chunks
+        ), f"Chunk {chunk} was modified during processing. For pass throughs streaming, chunks should be raw bytes"
+        received_chunks.append(chunk)
+
+    # Assert all chunks were processed
+    assert len(received_chunks) == len(raw_chunks), "Not all chunks were processed"
+
+    # collected chunks all together
+    assert b"".join(received_chunks) == b"".join(
+        raw_chunks
+    ), "Collected chunks do not match raw chunks"
+
+
+def test_convert_raw_bytes_to_str_lines():
+    """
+    Test that the _convert_raw_bytes_to_str_lines method correctly converts raw bytes to a list of strings
+    """
+    # Test case 1: Single chunk
+    raw_bytes = [b'data: {"content": "Hello"}\n']
+    result = PassThroughStreamingHandler._convert_raw_bytes_to_str_lines(raw_bytes)
+    assert result == ['data: {"content": "Hello"}']
+
+    # Test case 2: Multiple chunks
+    raw_bytes = [b'data: {"content": "Hello"}\n', b'data: {"content": "World"}\n']
+    result = PassThroughStreamingHandler._convert_raw_bytes_to_str_lines(raw_bytes)
+    assert result == ['data: {"content": "Hello"}', 'data: {"content": "World"}']
+
+    # Test case 3: Empty input
+    raw_bytes = []
+    result = PassThroughStreamingHandler._convert_raw_bytes_to_str_lines(raw_bytes)
+    assert result == []
+
+    # Test case 4: Chunks with empty lines
+    raw_bytes = [b'data: {"content": "Hello"}\n\n', b'\ndata: {"content": "World"}\n']
+    result = PassThroughStreamingHandler._convert_raw_bytes_to_str_lines(raw_bytes)
+    assert result == ['data: {"content": "Hello"}', 'data: {"content": "World"}']
--- a/tests/pass_through_unit_tests/test_unit_test_vertex_pass_through.py
+++ b/tests/pass_through_unit_tests/test_unit_test_vertex_pass_through.py
@ -0,0 +1,84 @@
+import json
+import os
+import sys
+from datetime import datetime
+from unittest.mock import AsyncMock, Mock, patch
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+
+import httpx
+import pytest
+import litellm
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+
+
+from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
+    get_litellm_virtual_key,
+    vertex_proxy_route,
+)
+
+
+@pytest.mark.asyncio
+async def test_get_litellm_virtual_key():
+    """
+    Test that the get_litellm_virtual_key function correctly handles the API key authentication
+    """
+    # Test with x-litellm-api-key
+    mock_request = Mock()
+    mock_request.headers = {"x-litellm-api-key": "test-key-123"}
+    result = get_litellm_virtual_key(mock_request)
+    assert result == "Bearer test-key-123"
+
+    # Test with Authorization header
+    mock_request.headers = {"Authorization": "Bearer auth-key-456"}
+    result = get_litellm_virtual_key(mock_request)
+    assert result == "Bearer auth-key-456"
+
+    # Test with both headers (x-litellm-api-key should take precedence)
+    mock_request.headers = {
+        "x-litellm-api-key": "test-key-123",
+        "Authorization": "Bearer auth-key-456",
+    }
+    result = get_litellm_virtual_key(mock_request)
+    assert result == "Bearer test-key-123"
+
+
+@pytest.mark.asyncio
+async def test_vertex_proxy_route_api_key_auth():
+    """
+    Critical
+
+    This is how Vertex AI JS SDK will Auth to Litellm Proxy
+    """
+    # Mock dependencies
+    mock_request = Mock()
+    mock_request.headers = {"x-litellm-api-key": "test-key-123"}
+    mock_request.method = "POST"
+    mock_response = Mock()
+
+    with patch(
+        "litellm.proxy.vertex_ai_endpoints.vertex_endpoints.user_api_key_auth"
+    ) as mock_auth:
+        mock_auth.return_value = {"api_key": "test-key-123"}
+
+        with patch(
+            "litellm.proxy.vertex_ai_endpoints.vertex_endpoints.create_pass_through_route"
+        ) as mock_pass_through:
+            mock_pass_through.return_value = AsyncMock(
+                return_value={"status": "success"}
+            )
+
+            # Call the function
+            result = await vertex_proxy_route(
+                endpoint="v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-1.5-pro:generateContent",
+                request=mock_request,
+                fastapi_response=mock_response,
+            )
+
+            # Verify user_api_key_auth was called with the correct Bearer token
+            mock_auth.assert_called_once()
+            call_args = mock_auth.call_args[1]
+            assert call_args["api_key"] == "Bearer test-key-123"