diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index 9ae6a2d23..a6d1d6438 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -1355,7 +1355,9 @@ def prompt_factory(
     try:
         if "meta-llama/llama-2" in model and "chat" in model:
             return llama_2_chat_pt(messages=messages)
-        elif "meta-llama/llama-3" in model and "instruct" in model:
+        elif (
+            "meta-llama/llama-3" in model or "meta-llama-3" in model
+        ) and "instruct" in model:
             return hf_chat_template(
                 model=model,
                 messages=messages,
diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py
index 73f375653..c1456bd3f 100644
--- a/litellm/llms/replicate.py
+++ b/litellm/llms/replicate.py
@@ -307,9 +307,7 @@ def completion(
         result, logs = handle_prediction_response(
             prediction_url, api_key, print_verbose
         )
-        model_response["ended"] = (
-            time.time()
-        )  # for pricing this must remain right after calling api
+
         ## LOGGING
         logging_obj.post_call(
             input=prompt,
diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml
index b5f49ed66..9372d4ca8 100644
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@@ -31,6 +31,9 @@ model_list:
 - litellm_params:
     model: gpt-3.5-turbo
   model_name: gpt-3.5-turbo
+- model_name: llama-3
+  litellm_params:
+    model: replicate/meta/meta-llama-3-8b-instruct
 router_settings:
   allowed_fails: 3
   context_window_fallbacks: null
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 503211c7b..1f12f75ee 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -1767,6 +1767,25 @@ def test_completion_azure_deployment_id():
 # test_completion_anthropic_openai_proxy()
 
 
+def test_completion_replicate_llama3():
+    litellm.set_verbose = True
+    model_name = "replicate/meta/meta-llama-3-8b-instruct"
+    try:
+        response = completion(
+            model=model_name,
+            messages=messages,
+        )
+        print(response)
+        # Add any assertions here to check the response
+        response_str = response["choices"][0]["message"]["content"]
+        print("RESPONSE STRING\n", response_str)
+        if type(response_str) != str:
+            pytest.fail(f"Error occurred: {e}")
+        raise Exception("it worked!")
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
 @pytest.mark.skip(reason="replicate endpoints take +2 mins just for this request")
 def test_completion_replicate_vicuna():
     print("TESTING REPLICATE")