diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py index 56549cfd4..77dc52aae 100644 --- a/litellm/llms/replicate.py +++ b/litellm/llms/replicate.py @@ -295,7 +295,15 @@ def handle_prediction_response_streaming(prediction_url, api_token, print_verbos response_data = response.json() status = response_data["status"] if "output" in response_data: - output_string = "".join(response_data["output"]) + try: + output_string = "".join(response_data["output"]) + except Exception as e: + raise ReplicateError( + status_code=422, + message="Unable to parse response. Got={}".format( + response_data["output"] + ), + ) new_output = output_string[len(previous_output) :] print_verbose(f"New chunk: {new_output}") yield {"output": new_output, "status": status} diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 1f1b253a0..0dd81e3b3 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -1415,7 +1415,6 @@ def test_bedrock_claude_3_streaming(): "gpt-3.5-turbo", "databricks/databricks-dbrx-instruct", # databricks "predibase/llama-3-8b-instruct", # predibase - "replicate/meta/meta-llama-3-8b-instruct", # replicate ], ) @pytest.mark.asyncio