diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py
index 03b688020..ef4a1a29e 100644
--- a/litellm/llms/replicate.py
+++ b/litellm/llms/replicate.py
@@ -39,7 +39,7 @@ def start_prediction(version_id, input_data, api_token, logging_obj):
         response_data = response.json()
         return response_data.get("urls", {}).get("get")
     else:
-        raise ReplicateError(response.status_code, message=response.text)
+        raise ReplicateError(response.status_code, f"Failed to start prediction {response.text}")
 
 # Function to handle prediction response (non-streaming)
 def handle_prediction_response(prediction_url, api_token, print_verbose):
diff --git a/litellm/tests/test_model_alias_map.py b/litellm/tests/test_model_alias_map.py
index 2bdbfdb6f..5045d9948 100644
--- a/litellm/tests/test_model_alias_map.py
+++ b/litellm/tests/test_model_alias_map.py
@@ -23,7 +23,7 @@ try:
         "llama2",
         messages=[{"role": "user", "content": "Hey, how's it going?"}],
         top_p=0.1,
-        temperature=0.1,
+        temperature=0.01,
         num_beams=4,
         max_tokens=60,
     )
diff --git a/litellm/utils.py b/litellm/utils.py
index 8a7d908e8..32434c0bf 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -706,7 +706,7 @@ def get_optional_params(  # use the openai defaults
             optional_params["stream"] = stream
             return optional_params
         if max_tokens != float("inf"):
-            if "vicuna" in model:
+            if "vicuna" in model or "flan" in model:
                 optional_params["max_length"] = max_tokens
             else:
                 optional_params["max_new_tokens"] = max_tokens