diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py
index 5329e3874d..5dff48d08b 100644
--- a/litellm/llms/replicate.py
+++ b/litellm/llms/replicate.py
@@ -108,10 +108,9 @@ def completion(
     version_id = model_to_version_id(model)
     input_data = {
         "prompt": prompt,
-        "max_new_tokens": 50,
+        **optional_params
     }
 
-
     ## LOGGING
     logging_obj.pre_call(
             input=prompt,
@@ -142,6 +141,9 @@ def completion(
         )
 
         print_verbose(f"raw model_response: {result}")
+
+        if len(result) == 0: # edge case, where result from replicate is empty
+            result = " "
         
         ## Building RESPONSE OBJECT
         model_response["choices"][0]["message"]["content"] = result
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index a2880916d2..475dc802b6 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -352,10 +352,13 @@ def test_completion_azure_deployment_id():
 # Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect.
 
 def test_completion_replicate_llama_2():
+    litellm.set_verbose = True
     model_name = "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf"
     try:
         response = completion(
-            model=model_name, messages=messages, custom_llm_provider="replicate"
+            model=model_name, 
+            messages=messages, 
+            custom_llm_provider="replicate"
         )
         print(response)
         # Add any assertions here to check the response
diff --git a/litellm/utils.py b/litellm/utils.py
index 39be90337d..dd40cfa63f 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -707,6 +707,8 @@ def get_optional_params(  # use the openai defaults
         if stream:
             optional_params["stream"] = stream
             return optional_params
+        if max_tokens != float("inf"):
+            optional_params["max_new_tokens"] = max_tokens
     elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
         if stream:
             optional_params["stream_tokens"] = stream
diff --git a/pyproject.toml b/pyproject.toml
index fb8f22707a..09133b3857 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.539"
+version = "0.1.540"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"