diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py index 03b688020..ef4a1a29e 100644 --- a/litellm/llms/replicate.py +++ b/litellm/llms/replicate.py @@ -39,7 +39,7 @@ def start_prediction(version_id, input_data, api_token, logging_obj): response_data = response.json() return response_data.get("urls", {}).get("get") else: - raise ReplicateError(response.status_code, message=response.text) + raise ReplicateError(response.status_code, f"Failed to start prediction {response.text}") # Function to handle prediction response (non-streaming) def handle_prediction_response(prediction_url, api_token, print_verbose): diff --git a/litellm/tests/test_model_alias_map.py b/litellm/tests/test_model_alias_map.py index 2bdbfdb6f..5045d9948 100644 --- a/litellm/tests/test_model_alias_map.py +++ b/litellm/tests/test_model_alias_map.py @@ -23,7 +23,7 @@ try: "llama2", messages=[{"role": "user", "content": "Hey, how's it going?"}], top_p=0.1, - temperature=0.1, + temperature=0.01, num_beams=4, max_tokens=60, ) diff --git a/litellm/utils.py b/litellm/utils.py index 8a7d908e8..32434c0bf 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -706,7 +706,7 @@ def get_optional_params( # use the openai defaults optional_params["stream"] = stream return optional_params if max_tokens != float("inf"): - if "vicuna" in model: + if "vicuna" in model or "flan" in model: optional_params["max_length"] = max_tokens else: optional_params["max_new_tokens"] = max_tokens