add optional params for llama-2

2025-04-27 11:43:54 +00:00 · 2023-09-04 11:41:18 -07:00 · 2023-09-04 11:41:18 -07:00 · 44f44ad5a3
commit 44f44ad5a3
parent 8c51824bfa
3 changed files with 29 additions and 12 deletions
--- a/litellm/llms/sagemaker.py
+++ b/litellm/llms/sagemaker.py
@ -58,8 +58,7 @@ def completion(
            prompt += f"{message['content']}"
    data = {
        "inputs": prompt,
-        # "instruction": prompt, # some baseten models require the prompt to be passed in via the 'instruction' kwarg
+        "parameters": optional_params
        **optional_params,
    }
    ## LOGGING
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -392,16 +392,19 @@ def test_completion_together_ai():
        pytest.fail(f"Error occurred: {e}")
-# def test_completion_sagemaker():
+def test_completion_sagemaker():
-#     try:
+    try:
-#         response = completion(
+        response = completion(
-#             model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b", 
+            model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b", 
-#             messages=messages
+            messages=messages,
-#         )
+            temperature=0.2,
-#         # Add any assertions here to check the response
+            max_tokens=80,
-#         print(response)
+        )
-#     except Exception as e:
+        # Add any assertions here to check the response
-#         pytest.fail(f"Error occurred: {e}")
+        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 # def test_vertex_ai():
 #     model_name = "chat-bison"
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -781,6 +781,21 @@ def get_optional_params(  # use the openai defaults
        if presence_penalty != 0:
            optional_params["repetition_penalty"] = presence_penalty
        optional_params["details"] = True
    elif custom_llm_provider == "sagemaker":
        if "llama-2" in model:
            # llama-2 models on sagemaker support the following args
            """
            max_new_tokens: Model generates text until the output length (excluding the input context length) reaches max_new_tokens. If specified, it must be a positive integer.
            temperature: Controls the randomness in the output. Higher temperature results in output sequence with low-probability words and lower temperature results in output sequence with high-probability words. If temperature -> 0, it results in greedy decoding. If specified, it must be a positive float.
            top_p: In each step of text generation, sample from the smallest possible set of words with cumulative probability top_p. If specified, it must be a float between 0 and 1.
            return_full_text: If True, input text will be part of the output generated text. If specified, it must be boolean. The default value for it is False.
            """
            if max_tokens != float("inf"):
                optional_params["max_new_tokens"] = max_tokens
            if temperature != 1:
                optional_params["temperature"] = temperature
            if top_p != 1:
                optional_params["top_p"] = top_p
    elif model in litellm.aleph_alpha_models:
        if max_tokens != float("inf"):
            optional_params["maximum_tokens"] = max_tokens