(Refactor) Code Quality improvement - Use Common base handler for cloudflare/ provider (#7127)

* add get_complete_url to base config * cloudflare - refactor to following existing pattern * migrate cloudflare chat completions to base llm http handler * fix unused import * fix fake stream in cloudflare * fix cloudflare transformation * fix naming for BaseModelResponseIterator * add async cloudflare streaming test * test cloudflare * add handler.py * add handler.py in cohere handler.py
2025-04-26 19:24:27 +00:00 · 2024-12-10 10:12:22 -08:00 · 2024-12-10 10:12:22 -08:00 · 9c2316b7ec
commit 9c2316b7ec
parent 28ff38e35d
14 changed files with 391 additions and 268 deletions
--- a/litellm/llms/base_llm/base_model_iterator.py
+++ b/litellm/llms/base_llm/base_model_iterator.py
@ -1,5 +1,5 @@
 import json
-from abc import abstractmethod
+from abc import ABC, abstractmethod
 from typing import List, Optional, Tuple

 import litellm
@ -12,6 +12,103 @@ from litellm.types.utils import (
 )


+class BaseModelResponseIterator:
+    def __init__(
+        self, streaming_response, sync_stream: bool, json_mode: Optional[bool] = False
+    ):
+        self.streaming_response = streaming_response
+        self.response_iterator = self.streaming_response
+        self.json_mode = json_mode
+
+    def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
+        return GenericStreamingChunk(
+            text="",
+            is_finished=False,
+            finish_reason="",
+            usage=None,
+            index=0,
+            tool_use=None,
+        )
+
+    # Sync iterator
+    def __iter__(self):
+        return self
+
+    def _handle_string_chunk(self, str_line: str) -> GenericStreamingChunk:
+        # chunk is a str at this point
+        if "[DONE]" in str_line:
+            return GenericStreamingChunk(
+                text="",
+                is_finished=True,
+                finish_reason="stop",
+                usage=None,
+                index=0,
+                tool_use=None,
+            )
+        elif str_line.startswith("data:"):
+            data_json = json.loads(str_line[5:])
+            return self.chunk_parser(chunk=data_json)
+        else:
+            return GenericStreamingChunk(
+                text="",
+                is_finished=False,
+                finish_reason="",
+                usage=None,
+                index=0,
+                tool_use=None,
+            )
+
+    def __next__(self):
+        try:
+            chunk = self.response_iterator.__next__()
+        except StopIteration:
+            raise StopIteration
+        except ValueError as e:
+            raise RuntimeError(f"Error receiving chunk from stream: {e}")
+
+        try:
+            str_line = chunk
+            if isinstance(chunk, bytes):  # Handle binary data
+                str_line = chunk.decode("utf-8")  # Convert bytes to string
+                index = str_line.find("data:")
+                if index != -1:
+                    str_line = str_line[index:]
+            # chunk is a str at this point
+            return self._handle_string_chunk(str_line=str_line)
+        except StopIteration:
+            raise StopIteration
+        except ValueError as e:
+            raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
+
+    # Async iterator
+    def __aiter__(self):
+        self.async_response_iterator = self.streaming_response.__aiter__()
+        return self
+
+    async def __anext__(self):
+        try:
+            chunk = await self.async_response_iterator.__anext__()
+        except StopAsyncIteration:
+            raise StopAsyncIteration
+        except ValueError as e:
+            raise RuntimeError(f"Error receiving chunk from stream: {e}")
+
+        try:
+            str_line = chunk
+            if isinstance(chunk, bytes):  # Handle binary data
+                str_line = chunk.decode("utf-8")  # Convert bytes to string
+                index = str_line.find("data:")
+                if index != -1:
+                    str_line = str_line[index:]
+
+            # chunk is a str at this point
+            return self._handle_string_chunk(str_line=str_line)
+        except StopAsyncIteration:
+            raise StopAsyncIteration
+        except ValueError as e:
+            raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
+
+
 class FakeStreamResponseIterator:
    def __init__(self, model_response, json_mode: Optional[bool] = False):
        self.model_response = model_response