add optional params for llama-2

This commit is contained in:
ishaan-jaff 2023-09-04 11:41:18 -07:00
parent 8c51824bfa
commit 44f44ad5a3
3 changed files with 29 additions and 12 deletions

View file

@ -58,8 +58,7 @@ def completion(
prompt += f"{message['content']}" prompt += f"{message['content']}"
data = { data = {
"inputs": prompt, "inputs": prompt,
# "instruction": prompt, # some baseten models require the prompt to be passed in via the 'instruction' kwarg "parameters": optional_params
**optional_params,
} }
## LOGGING ## LOGGING

View file

@ -392,16 +392,19 @@ def test_completion_together_ai():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# def test_completion_sagemaker(): def test_completion_sagemaker():
# try: try:
# response = completion( response = completion(
# model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b", model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b",
# messages=messages messages=messages,
# ) temperature=0.2,
# # Add any assertions here to check the response max_tokens=80,
# print(response) )
# except Exception as e: # Add any assertions here to check the response
# pytest.fail(f"Error occurred: {e}") print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# def test_vertex_ai(): # def test_vertex_ai():
# model_name = "chat-bison" # model_name = "chat-bison"

View file

@ -781,6 +781,21 @@ def get_optional_params( # use the openai defaults
if presence_penalty != 0: if presence_penalty != 0:
optional_params["repetition_penalty"] = presence_penalty optional_params["repetition_penalty"] = presence_penalty
optional_params["details"] = True optional_params["details"] = True
elif custom_llm_provider == "sagemaker":
if "llama-2" in model:
# llama-2 models on sagemaker support the following args
"""
max_new_tokens: Model generates text until the output length (excluding the input context length) reaches max_new_tokens. If specified, it must be a positive integer.
temperature: Controls the randomness in the output. Higher temperature results in output sequence with low-probability words and lower temperature results in output sequence with high-probability words. If temperature -> 0, it results in greedy decoding. If specified, it must be a positive float.
top_p: In each step of text generation, sample from the smallest possible set of words with cumulative probability top_p. If specified, it must be a float between 0 and 1.
return_full_text: If True, input text will be part of the output generated text. If specified, it must be boolean. The default value for it is False.
"""
if max_tokens != float("inf"):
optional_params["max_new_tokens"] = max_tokens
if temperature != 1:
optional_params["temperature"] = temperature
if top_p != 1:
optional_params["top_p"] = top_p
elif model in litellm.aleph_alpha_models: elif model in litellm.aleph_alpha_models:
if max_tokens != float("inf"): if max_tokens != float("inf"):
optional_params["maximum_tokens"] = max_tokens optional_params["maximum_tokens"] = max_tokens