diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py index 5329e3874d..5dff48d08b 100644 --- a/litellm/llms/replicate.py +++ b/litellm/llms/replicate.py @@ -108,10 +108,9 @@ def completion( version_id = model_to_version_id(model) input_data = { "prompt": prompt, - "max_new_tokens": 50, + **optional_params } - ## LOGGING logging_obj.pre_call( input=prompt, @@ -142,6 +141,9 @@ def completion( ) print_verbose(f"raw model_response: {result}") + + if len(result) == 0: # edge case, where result from replicate is empty + result = " " ## Building RESPONSE OBJECT model_response["choices"][0]["message"]["content"] = result diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index a2880916d2..475dc802b6 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -352,10 +352,13 @@ def test_completion_azure_deployment_id(): # Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect. def test_completion_replicate_llama_2(): + litellm.set_verbose = True model_name = "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf" try: response = completion( - model=model_name, messages=messages, custom_llm_provider="replicate" + model=model_name, + messages=messages, + custom_llm_provider="replicate" ) print(response) # Add any assertions here to check the response diff --git a/litellm/utils.py b/litellm/utils.py index 39be90337d..dd40cfa63f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -707,6 +707,8 @@ def get_optional_params( # use the openai defaults if stream: optional_params["stream"] = stream return optional_params + if max_tokens != float("inf"): + optional_params["max_new_tokens"] = max_tokens elif custom_llm_provider == "together_ai" or ("togethercomputer" in model): if stream: optional_params["stream_tokens"] = stream diff --git a/pyproject.toml b/pyproject.toml index fb8f22707a..09133b3857 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.539" +version = "0.1.540" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"