forked from phoenix/litellm-mirror
fix(huggingface_restapi.py): fix huggingface streaming error raising
This commit is contained in:
parent
766e8cba84
commit
873ddde924
3 changed files with 65 additions and 8 deletions
|
@ -634,15 +634,60 @@ class Huggingface(BaseLLM):
|
|||
status_code=r.status_code,
|
||||
message=str(text),
|
||||
)
|
||||
streamwrapper = CustomStreamWrapper(
|
||||
completion_stream=r.aiter_lines(),
|
||||
"""
|
||||
Check first chunk for error message.
|
||||
If error message, raise error.
|
||||
If not - add back to stream
|
||||
"""
|
||||
# Async iterator over the lines in the response body
|
||||
response_iterator = r.aiter_lines()
|
||||
|
||||
# Attempt to get the first line/chunk from the response
|
||||
try:
|
||||
first_chunk = await response_iterator.__anext__()
|
||||
except StopAsyncIteration:
|
||||
# Handle the case where there are no lines to read (empty response)
|
||||
first_chunk = ""
|
||||
|
||||
# Check the first chunk for an error message
|
||||
if (
|
||||
"error" in first_chunk.lower()
|
||||
): # Adjust this condition based on how error messages are structured
|
||||
raise HuggingfaceError(
|
||||
status_code=400,
|
||||
message=first_chunk,
|
||||
)
|
||||
|
||||
return self.async_streaming_generator(
|
||||
first_chunk=first_chunk,
|
||||
response_iterator=response_iterator,
|
||||
model=model,
|
||||
custom_llm_provider="huggingface",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
|
||||
async for transformed_chunk in streamwrapper:
|
||||
yield transformed_chunk
|
||||
async def async_streaming_generator(
|
||||
self, first_chunk, response_iterator, model, logging_obj
|
||||
):
|
||||
# Create a new async generator that begins with the first_chunk and includes the remaining items
|
||||
async def custom_stream_with_first_chunk():
|
||||
yield first_chunk # Yield back the first chunk
|
||||
async for (
|
||||
chunk
|
||||
) in response_iterator: # Continue yielding the rest of the chunks
|
||||
yield chunk
|
||||
|
||||
# Creating a new completion stream that starts with the first chunk
|
||||
completion_stream = custom_stream_with_first_chunk()
|
||||
|
||||
streamwrapper = CustomStreamWrapper(
|
||||
completion_stream=completion_stream,
|
||||
model=model,
|
||||
custom_llm_provider="huggingface",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
|
||||
async for transformed_chunk in streamwrapper:
|
||||
yield transformed_chunk
|
||||
|
||||
def embedding(
|
||||
self,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue