add vicuna translation

2025-04-24 18:24:20 +00:00 · 2023-09-06 11:14:23 -07:00 · 2023-09-06 11:14:23 -07:00 · af60b2ba77
commit af60b2ba77
parent afcd6b28cc
2 changed files with 26 additions and 3 deletions
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -352,12 +352,12 @@ def test_completion_azure_deployment_id():
 # Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect.

 def test_completion_replicate_llama_2():
-    litellm.set_verbose = True
    model_name = "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf"
    try:
        response = completion(
            model=model_name, 
            messages=messages, 
+            max_tokens=20,
            custom_llm_provider="replicate"
        )
        print(response)
@ -368,9 +368,29 @@ def test_completion_replicate_llama_2():
            pytest.fail(f"Error occurred: {e}")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
-
 # test_completion_replicate_llama_2()

+def test_completion_replicate_vicuna():
+    model_name = "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b"
+    try:
+        response = completion(
+            model=model_name, 
+            messages=messages, 
+            custom_llm_provider="replicate",
+            temperature=0.1,
+            max_tokens=20,
+        )
+        print(response)
+        # Add any assertions here to check the response
+        response_str = response["choices"][0]["message"]["content"]
+        print(response_str)
+        if type(response_str) != str:
+            pytest.fail(f"Error occurred: {e}")
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+# test_completion_replicate_vicuna()
+
 def test_completion_replicate_llama_stream():
    model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
    try:
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -706,7 +706,10 @@ def get_optional_params(  # use the openai defaults
            optional_params["stream"] = stream
            return optional_params
        if max_tokens != float("inf"):
-            optional_params["max_new_tokens"] = max_tokens
+            if "vicuna" in model:
+                optional_params["max_length"] = max_tokens
+            else:
+                optional_params["max_new_tokens"] = max_tokens
        if temperature != 1:
            optional_params["temperature"] = temperature
        if top_p != 1: