use helper for _handle_logging_vertex_collected_chunks

2024-11-21 17:57:16 -08:00 · 2024-11-21 17:57:16 -08:00 · 088532082e
commit 088532082e
parent fe5f57b86c
2 changed files with 88 additions and 1 deletions
--- a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py
+++ b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py
@ -11,6 +11,9 @@ from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
 from litellm.litellm_core_utils.litellm_logging import (
    get_standard_logging_object_payload,
 )
+from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
+    ModelResponseIterator as VertexModelResponseIterator,
+)

 if TYPE_CHECKING:
    from ..success_handler import PassThroughEndpointLogging
@ -111,6 +114,78 @@ class VertexPassthroughLoggingHandler:
                **kwargs,
            )

+    @staticmethod
+    async def _handle_logging_vertex_collected_chunks(
+        litellm_logging_obj: LiteLLMLoggingObj,
+        passthrough_success_handler_obj: PassThroughEndpointLogging,
+        url_route: str,
+        request_body: dict,
+        endpoint_type: EndpointType,
+        start_time: datetime,
+        all_chunks: List[str],
+        end_time: datetime,
+    ):
+        """
+        Takes raw chunks from Vertex passthrough endpoint and logs them in litellm callbacks
+
+        - Builds complete response from chunks
+        - Creates standard logging object
+        - Logs in litellm callbacks
+        """
+        kwargs = {}
+        model = VertexPassthroughLoggingHandler.extract_model_from_url(url_route)
+        complete_streaming_response = (
+            VertexPassthroughLoggingHandler._build_complete_streaming_response(
+                all_chunks=all_chunks,
+                litellm_logging_obj=litellm_logging_obj,
+                model=model,
+            )
+        )
+
+        if complete_streaming_response is None:
+            verbose_proxy_logger.error(
+                "Unable to build complete streaming response for Vertex passthrough endpoint, not logging..."
+            )
+            return
+        await litellm_logging_obj.async_success_handler(
+            result=complete_streaming_response,
+            start_time=start_time,
+            end_time=end_time,
+            cache_hit=False,
+            **kwargs,
+        )
+
+    @staticmethod
+    def _build_complete_streaming_response(
+        all_chunks: List[str],
+        litellm_logging_obj: LiteLLMLoggingObj,
+        model: str,
+    ) -> Optional[Union[litellm.ModelResponse, litellm.TextCompletionResponse]]:
+        vertex_iterator = VertexModelResponseIterator(
+            streaming_response=None,
+            sync_stream=False,
+        )
+        litellm_custom_stream_wrapper = litellm.CustomStreamWrapper(
+            completion_stream=vertex_iterator,
+            model=model,
+            logging_obj=litellm_logging_obj,
+            custom_llm_provider="vertex_ai",
+        )
+        all_openai_chunks = []
+        for chunk in all_chunks:
+            generic_chunk = vertex_iterator._common_chunk_parsing_logic(chunk)
+            litellm_chunk = litellm_custom_stream_wrapper.chunk_creator(
+                chunk=generic_chunk
+            )
+            if litellm_chunk is not None:
+                all_openai_chunks.append(litellm_chunk)
+
+        complete_streaming_response = litellm.stream_chunk_builder(
+            chunks=all_openai_chunks
+        )
+
+        return complete_streaming_response
+
    @staticmethod
    def extract_model_from_url(url: str) -> str:
        pattern = r"/models/([^:]+)"
--- a/litellm/proxy/pass_through_endpoints/streaming_handler.py
+++ b/litellm/proxy/pass_through_endpoints/streaming_handler.py
@ -20,6 +20,9 @@ from litellm.types.utils import GenericStreamingChunk
 from .llm_provider_handlers.anthropic_passthrough_logging_handler import (
    AnthropicPassthroughLoggingHandler,
 )
+from .llm_provider_handlers.vertex_passthrough_logging_handler import (
+    VertexPassthroughLoggingHandler,
+)
 from .success_handler import PassThroughEndpointLogging
 from .types import EndpointType

@ -100,7 +103,16 @@ async def _route_streaming_logging_to_handler(
            end_time=end_time,
        )
    elif endpoint_type == EndpointType.VERTEX_AI:
-        pass
+        await VertexPassthroughLoggingHandler._handle_logging_vertex_collected_chunks(
+            litellm_logging_obj=litellm_logging_obj,
+            passthrough_success_handler_obj=passthrough_success_handler_obj,
+            url_route=url_route,
+            request_body=request_body,
+            endpoint_type=endpoint_type,
+            start_time=start_time,
+            all_chunks=all_chunks,
+            end_time=end_time,
+        )
    elif endpoint_type == EndpointType.GENERIC:
        # No logging is supported for generic streaming endpoints
        pass