fix(factory.py): add replicate meta llama prompt templating support

2024-04-25 08:24:28 -07:00 · 2024-04-25 08:24:28 -07:00 · 4f46b4c397
commit 4f46b4c397
parent 92f21cba30
4 changed files with 26 additions and 4 deletions
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -1355,7 +1355,9 @@ def prompt_factory(
    try:
        if "meta-llama/llama-2" in model and "chat" in model:
            return llama_2_chat_pt(messages=messages)
-        elif "meta-llama/llama-3" in model and "instruct" in model:
+        elif (
            "meta-llama/llama-3" in model or "meta-llama-3" in model
        ) and "instruct" in model:
            return hf_chat_template(
                model=model,
                messages=messages,
--- a/litellm/llms/replicate.py
+++ b/litellm/llms/replicate.py
@ -307,9 +307,7 @@ def completion(
        result, logs = handle_prediction_response(
            prediction_url, api_key, print_verbose
        )
-        model_response["ended"] = (
+
            time.time()
        )  # for pricing this must remain right after calling api
        ## LOGGING
        logging_obj.post_call(
            input=prompt,
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@ -31,6 +31,9 @@ model_list:
 - litellm_params:
    model: gpt-3.5-turbo
  model_name: gpt-3.5-turbo
 - model_name: llama-3
  litellm_params:
    model: replicate/meta/meta-llama-3-8b-instruct
 router_settings:
  allowed_fails: 3
  context_window_fallbacks: null
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -1767,6 +1767,25 @@ def test_completion_azure_deployment_id():
 # test_completion_anthropic_openai_proxy()
 def test_completion_replicate_llama3():
    litellm.set_verbose = True
    model_name = "replicate/meta/meta-llama-3-8b-instruct"
    try:
        response = completion(
            model=model_name,
            messages=messages,
        )
        print(response)
        # Add any assertions here to check the response
        response_str = response["choices"][0]["message"]["content"]
        print("RESPONSE STRING\n", response_str)
        if type(response_str) != str:
            pytest.fail(f"Error occurred: {e}")
        raise Exception("it worked!")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="replicate endpoints take +2 mins just for this request")
 def test_completion_replicate_vicuna():
    print("TESTING REPLICATE")