fix(huggingface_restapi.py): fix huggingface streaming error raising

2024-03-04 09:32:27 -08:00 · 2024-03-04 09:32:27 -08:00 · 873ddde924
commit 873ddde924
parent 766e8cba84
3 changed files with 65 additions and 8 deletions
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@ -634,15 +634,60 @@ class Huggingface(BaseLLM):
                        status_code=r.status_code,
                        message=str(text),
                    )
-                streamwrapper = CustomStreamWrapper(
-                    completion_stream=r.aiter_lines(),
+                """
+                Check first chunk for error message. 
+                If error message, raise error. 
+                If not - add back to stream
+                """
+                # Async iterator over the lines in the response body
+                response_iterator = r.aiter_lines()
+
+                # Attempt to get the first line/chunk from the response
+                try:
+                    first_chunk = await response_iterator.__anext__()
+                except StopAsyncIteration:
+                    # Handle the case where there are no lines to read (empty response)
+                    first_chunk = ""
+
+                # Check the first chunk for an error message
+                if (
+                    "error" in first_chunk.lower()
+                ):  # Adjust this condition based on how error messages are structured
+                    raise HuggingfaceError(
+                        status_code=400,
+                        message=first_chunk,
+                    )
+
+                return self.async_streaming_generator(
+                    first_chunk=first_chunk,
+                    response_iterator=response_iterator,
                    model=model,
-                    custom_llm_provider="huggingface",
                    logging_obj=logging_obj,
                )

-                async for transformed_chunk in streamwrapper:
-                    yield transformed_chunk
+    async def async_streaming_generator(
+        self, first_chunk, response_iterator, model, logging_obj
+    ):
+        # Create a new async generator that begins with the first_chunk and includes the remaining items
+        async def custom_stream_with_first_chunk():
+            yield first_chunk  # Yield back the first chunk
+            async for (
+                chunk
+            ) in response_iterator:  # Continue yielding the rest of the chunks
+                yield chunk
+
+        # Creating a new completion stream that starts with the first chunk
+        completion_stream = custom_stream_with_first_chunk()
+
+        streamwrapper = CustomStreamWrapper(
+            completion_stream=completion_stream,
+            model=model,
+            custom_llm_provider="huggingface",
+            logging_obj=logging_obj,
+        )
+
+        async for transformed_chunk in streamwrapper:
+            yield transformed_chunk

    def embedding(
        self,