diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py
index 56549cfd4..77dc52aae 100644
--- a/litellm/llms/replicate.py
+++ b/litellm/llms/replicate.py
@@ -295,7 +295,15 @@ def handle_prediction_response_streaming(prediction_url, api_token, print_verbos
             response_data = response.json()
             status = response_data["status"]
             if "output" in response_data:
-                output_string = "".join(response_data["output"])
+                try:
+                    output_string = "".join(response_data["output"])
+                except Exception as e:
+                    raise ReplicateError(
+                        status_code=422,
+                        message="Unable to parse response. Got={}".format(
+                            response_data["output"]
+                        ),
+                    )
                 new_output = output_string[len(previous_output) :]
                 print_verbose(f"New chunk: {new_output}")
                 yield {"output": new_output, "status": status}
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 1f1b253a0..0dd81e3b3 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -1415,7 +1415,6 @@ def test_bedrock_claude_3_streaming():
         "gpt-3.5-turbo",
         "databricks/databricks-dbrx-instruct",  # databricks
         "predibase/llama-3-8b-instruct",  # predibase
-        "replicate/meta/meta-llama-3-8b-instruct",  # replicate
     ],
 )
 @pytest.mark.asyncio