fix(test_streaming.py): handle hf tgi zephyr not loading for streaming issue

2025-04-25 18:54:30 +00:00 · 2024-02-15 19:24:02 -08:00 · 2024-02-15 19:24:02 -08:00 · eb45df16f1
commit eb45df16f1
parent 31c8d62ac2
3 changed files with 27 additions and 12 deletions
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@ -399,9 +399,12 @@ class Huggingface(BaseLLM):
                data = {
                    "inputs": prompt,
                    "parameters": optional_params,
-                    "stream": True
+                    "stream": (
-                    if "stream" in optional_params and optional_params["stream"] == True
+                        True
-                    else False,
+                        if "stream" in optional_params
                        and optional_params["stream"] == True
                        else False
                    ),
                }
                input_text = prompt
            else:
@ -430,9 +433,12 @@ class Huggingface(BaseLLM):
                data = {
                    "inputs": prompt,
                    "parameters": inference_params,
-                    "stream": True
+                    "stream": (
-                    if "stream" in optional_params and optional_params["stream"] == True
+                        True
-                    else False,
+                        if "stream" in optional_params
                        and optional_params["stream"] == True
                        else False
                    ),
                }
                input_text = prompt
            ## LOGGING
@ -561,14 +567,12 @@ class Huggingface(BaseLLM):
        input_text: str,
        model: str,
        optional_params: dict,
-        timeout: float
+        timeout: float,
    ):
        response = None
        try:
            async with httpx.AsyncClient(timeout=timeout) as client:
-                response = await client.post(
+                response = await client.post(url=api_base, json=data, headers=headers)
                    url=api_base, json=data, headers=headers
                )
                response_json = response.json()
                if response.status_code != 200:
                    raise HuggingfaceError(
@ -607,7 +611,7 @@ class Huggingface(BaseLLM):
        headers: dict,
        model_response: ModelResponse,
        model: str,
-        timeout: float
+        timeout: float,
    ):
        async with httpx.AsyncClient(timeout=timeout) as client:
            response = client.stream(
@ -615,9 +619,10 @@ class Huggingface(BaseLLM):
            )
            async with response as r:
                if r.status_code != 200:
                    text = await r.aread()
                    raise HuggingfaceError(
                        status_code=r.status_code,
-                        message="An error occurred while streaming",
+                        message=str(text),
                    )
                streamwrapper = CustomStreamWrapper(
                    completion_stream=r.aiter_lines(),
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -1031,6 +1031,8 @@ async def test_hf_completion_tgi_stream():
        if complete_response.strip() == "":
            raise Exception("Empty response received")
        print(f"completion_response: {complete_response}")
    except litellm.ServiceUnavailableError as e:
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -6975,6 +6975,14 @@ def exception_type(
                            model=model,
                            response=original_exception.response,
                        )
                    elif original_exception.status_code == 503:
                        exception_mapping_worked = True
                        raise ServiceUnavailableError(
                            message=f"HuggingfaceException - {original_exception.message}",
                            llm_provider="huggingface",
                            model=model,
                            response=original_exception.response,
                        )
                    else:
                        exception_mapping_worked = True
                        raise APIError(