(feat) use @google-cloud/vertexai js sdk with litellm (#6873)

* stash gemini JS test * add vertex js sdj example * handle vertex pass through separately * tes vertex JS sdk * fix vertex_proxy_route * use PassThroughStreamingHandler * fix PassThroughStreamingHandler * use common _create_vertex_response_logging_payload_for_generate_content * test vertex js * add working vertex jest tests * move basic bass through test * use good name for test * test vertex * test_chunk_processor_yields_raw_bytes * unit tests for streaming * test_convert_raw_bytes_to_str_lines * run unit tests 1st * simplify local * docs add usage example for js * use get_litellm_virtual_key * add unit tests for vertex pass through
2025-04-25 18:54:30 +00:00 · 2024-11-22 16:50:10 -08:00 · 2024-11-22 16:50:10 -08:00 · b2b3e40d13
commit b2b3e40d13
parent 5930c42e74
14 changed files with 680 additions and 89 deletions
--- a/litellm/proxy/pass_through_endpoints/streaming_handler.py
+++ b/litellm/proxy/pass_through_endpoints/streaming_handler.py
@ -27,93 +27,107 @@ from .success_handler import PassThroughEndpointLogging
 from .types import EndpointType


-async def chunk_processor(
-    response: httpx.Response,
-    request_body: Optional[dict],
-    litellm_logging_obj: LiteLLMLoggingObj,
-    endpoint_type: EndpointType,
-    start_time: datetime,
-    passthrough_success_handler_obj: PassThroughEndpointLogging,
-    url_route: str,
-):
-    """
-    - Yields chunks from the response
-    - Collect non-empty chunks for post-processing (logging)
-    """
-    collected_chunks: List[str] = []  # List to store all chunks
-    try:
-        async for chunk in response.aiter_lines():
-            verbose_proxy_logger.debug(f"Processing chunk: {chunk}")
-            if not chunk:
-                continue
+class PassThroughStreamingHandler:

-            # Handle SSE format - pass through the raw SSE format
-            if isinstance(chunk, bytes):
-                chunk = chunk.decode("utf-8")
+    @staticmethod
+    async def chunk_processor(
+        response: httpx.Response,
+        request_body: Optional[dict],
+        litellm_logging_obj: LiteLLMLoggingObj,
+        endpoint_type: EndpointType,
+        start_time: datetime,
+        passthrough_success_handler_obj: PassThroughEndpointLogging,
+        url_route: str,
+    ):
+        """
+        - Yields chunks from the response
+        - Collect non-empty chunks for post-processing (logging)
+        """
+        try:
+            raw_bytes: List[bytes] = []
+            async for chunk in response.aiter_bytes():
+                raw_bytes.append(chunk)
+                yield chunk

-            # Store the chunk for post-processing
-            if chunk.strip():  # Only store non-empty chunks
-                collected_chunks.append(chunk)
-                yield f"{chunk}\n"
+            # After all chunks are processed, handle post-processing
+            end_time = datetime.now()

-        # After all chunks are processed, handle post-processing
-        end_time = datetime.now()
+            await PassThroughStreamingHandler._route_streaming_logging_to_handler(
+                litellm_logging_obj=litellm_logging_obj,
+                passthrough_success_handler_obj=passthrough_success_handler_obj,
+                url_route=url_route,
+                request_body=request_body or {},
+                endpoint_type=endpoint_type,
+                start_time=start_time,
+                raw_bytes=raw_bytes,
+                end_time=end_time,
+            )
+        except Exception as e:
+            verbose_proxy_logger.error(f"Error in chunk_processor: {str(e)}")
+            raise

-        await _route_streaming_logging_to_handler(
-            litellm_logging_obj=litellm_logging_obj,
-            passthrough_success_handler_obj=passthrough_success_handler_obj,
-            url_route=url_route,
-            request_body=request_body or {},
-            endpoint_type=endpoint_type,
-            start_time=start_time,
-            all_chunks=collected_chunks,
-            end_time=end_time,
+    @staticmethod
+    async def _route_streaming_logging_to_handler(
+        litellm_logging_obj: LiteLLMLoggingObj,
+        passthrough_success_handler_obj: PassThroughEndpointLogging,
+        url_route: str,
+        request_body: dict,
+        endpoint_type: EndpointType,
+        start_time: datetime,
+        raw_bytes: List[bytes],
+        end_time: datetime,
+    ):
+        """
+        Route the logging for the collected chunks to the appropriate handler
+
+        Supported endpoint types:
+        - Anthropic
+        - Vertex AI
+        """
+        all_chunks = PassThroughStreamingHandler._convert_raw_bytes_to_str_lines(
+            raw_bytes
        )
+        if endpoint_type == EndpointType.ANTHROPIC:
+            await AnthropicPassthroughLoggingHandler._handle_logging_anthropic_collected_chunks(
+                litellm_logging_obj=litellm_logging_obj,
+                passthrough_success_handler_obj=passthrough_success_handler_obj,
+                url_route=url_route,
+                request_body=request_body,
+                endpoint_type=endpoint_type,
+                start_time=start_time,
+                all_chunks=all_chunks,
+                end_time=end_time,
+            )
+        elif endpoint_type == EndpointType.VERTEX_AI:
+            await VertexPassthroughLoggingHandler._handle_logging_vertex_collected_chunks(
+                litellm_logging_obj=litellm_logging_obj,
+                passthrough_success_handler_obj=passthrough_success_handler_obj,
+                url_route=url_route,
+                request_body=request_body,
+                endpoint_type=endpoint_type,
+                start_time=start_time,
+                all_chunks=all_chunks,
+                end_time=end_time,
+            )
+        elif endpoint_type == EndpointType.GENERIC:
+            # No logging is supported for generic streaming endpoints
+            pass

-    except Exception as e:
-        verbose_proxy_logger.error(f"Error in chunk_processor: {str(e)}")
-        raise
+    @staticmethod
+    def _convert_raw_bytes_to_str_lines(raw_bytes: List[bytes]) -> List[str]:
+        """
+        Converts a list of raw bytes into a list of string lines, similar to aiter_lines()

+        Args:
+            raw_bytes: List of bytes chunks from aiter.bytes()

-async def _route_streaming_logging_to_handler(
-    litellm_logging_obj: LiteLLMLoggingObj,
-    passthrough_success_handler_obj: PassThroughEndpointLogging,
-    url_route: str,
-    request_body: dict,
-    endpoint_type: EndpointType,
-    start_time: datetime,
-    all_chunks: List[str],
-    end_time: datetime,
-):
-    """
-    Route the logging for the collected chunks to the appropriate handler
+        Returns:
+            List of string lines, with each line being a complete data: {} chunk
+        """
+        # Combine all bytes and decode to string
+        combined_str = b"".join(raw_bytes).decode("utf-8")

-    Supported endpoint types:
-    - Anthropic
-    - Vertex AI
-    """
-    if endpoint_type == EndpointType.ANTHROPIC:
-        await AnthropicPassthroughLoggingHandler._handle_logging_anthropic_collected_chunks(
-            litellm_logging_obj=litellm_logging_obj,
-            passthrough_success_handler_obj=passthrough_success_handler_obj,
-            url_route=url_route,
-            request_body=request_body,
-            endpoint_type=endpoint_type,
-            start_time=start_time,
-            all_chunks=all_chunks,
-            end_time=end_time,
-        )
-    elif endpoint_type == EndpointType.VERTEX_AI:
-        await VertexPassthroughLoggingHandler._handle_logging_vertex_collected_chunks(
-            litellm_logging_obj=litellm_logging_obj,
-            passthrough_success_handler_obj=passthrough_success_handler_obj,
-            url_route=url_route,
-            request_body=request_body,
-            endpoint_type=endpoint_type,
-            start_time=start_time,
-            all_chunks=all_chunks,
-            end_time=end_time,
-        )
-    elif endpoint_type == EndpointType.GENERIC:
-        # No logging is supported for generic streaming endpoints
-        pass
+        # Split by newlines and filter out empty lines
+        lines = [line.strip() for line in combined_str.split("\n") if line.strip()]
+
+        return lines