fix(factory.py): add replicate meta llama prompt templating support

2024-04-25 08:24:28 -07:00 · 2024-04-25 08:24:28 -07:00 · 4f46b4c397
commit 4f46b4c397
parent 92f21cba30
4 changed files with 26 additions and 4 deletions
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -1355,7 +1355,9 @@ def prompt_factory(
    try:
        if "meta-llama/llama-2" in model and "chat" in model:
            return llama_2_chat_pt(messages=messages)
-        elif "meta-llama/llama-3" in model and "instruct" in model:
+        elif (
+            "meta-llama/llama-3" in model or "meta-llama-3" in model
+        ) and "instruct" in model:
            return hf_chat_template(
                model=model,
                messages=messages,
--- a/litellm/llms/replicate.py
+++ b/litellm/llms/replicate.py
@ -307,9 +307,7 @@ def completion(
        result, logs = handle_prediction_response(
            prediction_url, api_key, print_verbose
        )
-        model_response["ended"] = (
-            time.time()
-        )  # for pricing this must remain right after calling api
+
        ## LOGGING
        logging_obj.post_call(
            input=prompt,
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@ -31,6 +31,9 @@ model_list:
 - litellm_params:
    model: gpt-3.5-turbo
  model_name: gpt-3.5-turbo
+- model_name: llama-3
+  litellm_params:
+    model: replicate/meta/meta-llama-3-8b-instruct
 router_settings:
  allowed_fails: 3
  context_window_fallbacks: null
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -1767,6 +1767,25 @@ def test_completion_azure_deployment_id():
 # test_completion_anthropic_openai_proxy()


+def test_completion_replicate_llama3():
+    litellm.set_verbose = True
+    model_name = "replicate/meta/meta-llama-3-8b-instruct"
+    try:
+        response = completion(
+            model=model_name,
+            messages=messages,
+        )
+        print(response)
+        # Add any assertions here to check the response
+        response_str = response["choices"][0]["message"]["content"]
+        print("RESPONSE STRING\n", response_str)
+        if type(response_str) != str:
+            pytest.fail(f"Error occurred: {e}")
+        raise Exception("it worked!")
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
@pytest.mark.skip(reason="replicate endpoints take +2 mins just for this request")
 def test_completion_replicate_vicuna():
    print("TESTING REPLICATE")