diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 1d13b973f..c64c84536 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -43,7 +43,7 @@ def test_completion_hf_api(): try: user_message = "write some code to find the sum of two numbers" messages = [{ "content": user_message,"role": "user"}] - response = completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True) + response = completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface") # Add any assertions here to check the response print(response) except Exception as e: @@ -141,7 +141,7 @@ def test_completion_openai_with_functions(): def test_completion_azure(): try: - response = completion(model="gpt-3.5-turbo", deployment_id="chatgpt-test", messages=messages, azure=True) + response = completion(model="gpt-3.5-turbo", deployment_id="chatgpt-test", messages=messages, custom_llm_provider="azure") # Add any assertions here to check the response print(response) except Exception as e: @@ -162,7 +162,7 @@ def test_completion_replicate_llama_stream(): def test_completion_replicate_stability_stream(): model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb" try: - response = completion(model=model_name, messages=messages, stream=True, replicate=True) + response = completion(model=model_name, messages=messages, stream=True, custom_llm_provider="replicate") # Add any assertions here to check the response for chunk in response: print(chunk['choices'][0]['delta']) @@ -170,27 +170,10 @@ def test_completion_replicate_stability_stream(): except Exception as e: pytest.fail(f"Error occurred: {e}") - -# Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect. -# [TODO] improve our try-except block to handle for these -# def test_completion_replicate_llama(): -# model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" -# try: -# response = completion(model=model_name, messages=messages, max_tokens=500) -# # Add any assertions here to check the response -# print(response) -# except Exception as e: -# print(f"in replicate llama, got error {e}") -# pass -# if e == "FunctionTimedOut": -# pass -# else: -# pytest.fail(f"Error occurred: {e}") - def test_completion_replicate_stability(): model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb" try: - response = completion(model=model_name, messages=messages, replicate=True) + response = completion(model=model_name, messages=messages, custom_llm_provider="replicate") # Add any assertions here to check the response for result in response: print(result) @@ -202,7 +185,7 @@ def test_completion_replicate_stability(): def test_completion_together_ai(): model_name = "togethercomputer/llama-2-70b-chat" try: - response = completion(model=model_name, messages=messages, together_ai=True) + response = completion(model=model_name, messages=messages, custom_llm_provider="together_ai") # Add any assertions here to check the response print(response) except Exception as e: @@ -211,7 +194,7 @@ def test_completion_together_ai(): def test_completion_together_ai_stream(): model_name = "togethercomputer/llama-2-70b-chat" try: - response = completion(model=model_name, messages=messages, together_ai=True, stream=True) + response = completion(model=model_name, messages=messages, custom_llm_provider="together_ai", stream=True) # Add any assertions here to check the response print(response) for chunk in response: