fix(huggingface_restapi.py): fix huggingface streaming error raising

This commit is contained in:
Krrish Dholakia 2024-03-04 09:32:27 -08:00
parent 766e8cba84
commit 873ddde924
3 changed files with 65 additions and 8 deletions

View file

@ -634,15 +634,60 @@ class Huggingface(BaseLLM):
status_code=r.status_code,
message=str(text),
)
streamwrapper = CustomStreamWrapper(
completion_stream=r.aiter_lines(),
"""
Check first chunk for error message.
If error message, raise error.
If not - add back to stream
"""
# Async iterator over the lines in the response body
response_iterator = r.aiter_lines()
# Attempt to get the first line/chunk from the response
try:
first_chunk = await response_iterator.__anext__()
except StopAsyncIteration:
# Handle the case where there are no lines to read (empty response)
first_chunk = ""
# Check the first chunk for an error message
if (
"error" in first_chunk.lower()
): # Adjust this condition based on how error messages are structured
raise HuggingfaceError(
status_code=400,
message=first_chunk,
)
return self.async_streaming_generator(
first_chunk=first_chunk,
response_iterator=response_iterator,
model=model,
custom_llm_provider="huggingface",
logging_obj=logging_obj,
)
async for transformed_chunk in streamwrapper:
yield transformed_chunk
async def async_streaming_generator(
self, first_chunk, response_iterator, model, logging_obj
):
# Create a new async generator that begins with the first_chunk and includes the remaining items
async def custom_stream_with_first_chunk():
yield first_chunk # Yield back the first chunk
async for (
chunk
) in response_iterator: # Continue yielding the rest of the chunks
yield chunk
# Creating a new completion stream that starts with the first chunk
completion_stream = custom_stream_with_first_chunk()
streamwrapper = CustomStreamWrapper(
completion_stream=completion_stream,
model=model,
custom_llm_provider="huggingface",
logging_obj=logging_obj,
)
async for transformed_chunk in streamwrapper:
yield transformed_chunk
def embedding(
self,