diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 9ae6a2d23..a6d1d6438 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -1355,7 +1355,9 @@ def prompt_factory( try: if "meta-llama/llama-2" in model and "chat" in model: return llama_2_chat_pt(messages=messages) - elif "meta-llama/llama-3" in model and "instruct" in model: + elif ( + "meta-llama/llama-3" in model or "meta-llama-3" in model + ) and "instruct" in model: return hf_chat_template( model=model, messages=messages, diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py index 73f375653..c1456bd3f 100644 --- a/litellm/llms/replicate.py +++ b/litellm/llms/replicate.py @@ -307,9 +307,7 @@ def completion( result, logs = handle_prediction_response( prediction_url, api_key, print_verbose ) - model_response["ended"] = ( - time.time() - ) # for pricing this must remain right after calling api + ## LOGGING logging_obj.post_call( input=prompt, diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml index b5f49ed66..9372d4ca8 100644 --- a/litellm/proxy/_super_secret_config.yaml +++ b/litellm/proxy/_super_secret_config.yaml @@ -31,6 +31,9 @@ model_list: - litellm_params: model: gpt-3.5-turbo model_name: gpt-3.5-turbo +- model_name: llama-3 + litellm_params: + model: replicate/meta/meta-llama-3-8b-instruct router_settings: allowed_fails: 3 context_window_fallbacks: null diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 503211c7b..1f12f75ee 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -1767,6 +1767,25 @@ def test_completion_azure_deployment_id(): # test_completion_anthropic_openai_proxy() +def test_completion_replicate_llama3(): + litellm.set_verbose = True + model_name = "replicate/meta/meta-llama-3-8b-instruct" + try: + response = completion( + model=model_name, + messages=messages, + ) + print(response) + # Add any assertions here to check the response + response_str = response["choices"][0]["message"]["content"] + print("RESPONSE STRING\n", response_str) + if type(response_str) != str: + pytest.fail(f"Error occurred: {e}") + raise Exception("it worked!") + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + @pytest.mark.skip(reason="replicate endpoints take +2 mins just for this request") def test_completion_replicate_vicuna(): print("TESTING REPLICATE")