mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
add optional params for llama-2
This commit is contained in:
parent
8c51824bfa
commit
44f44ad5a3
3 changed files with 29 additions and 12 deletions
|
@ -58,8 +58,7 @@ def completion(
|
||||||
prompt += f"{message['content']}"
|
prompt += f"{message['content']}"
|
||||||
data = {
|
data = {
|
||||||
"inputs": prompt,
|
"inputs": prompt,
|
||||||
# "instruction": prompt, # some baseten models require the prompt to be passed in via the 'instruction' kwarg
|
"parameters": optional_params
|
||||||
**optional_params,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
|
|
|
@ -392,16 +392,19 @@ def test_completion_together_ai():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# def test_completion_sagemaker():
|
def test_completion_sagemaker():
|
||||||
# try:
|
try:
|
||||||
# response = completion(
|
response = completion(
|
||||||
# model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b",
|
model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b",
|
||||||
# messages=messages
|
messages=messages,
|
||||||
# )
|
temperature=0.2,
|
||||||
# # Add any assertions here to check the response
|
max_tokens=80,
|
||||||
# print(response)
|
)
|
||||||
# except Exception as e:
|
# Add any assertions here to check the response
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# def test_vertex_ai():
|
# def test_vertex_ai():
|
||||||
# model_name = "chat-bison"
|
# model_name = "chat-bison"
|
||||||
|
|
|
@ -781,6 +781,21 @@ def get_optional_params( # use the openai defaults
|
||||||
if presence_penalty != 0:
|
if presence_penalty != 0:
|
||||||
optional_params["repetition_penalty"] = presence_penalty
|
optional_params["repetition_penalty"] = presence_penalty
|
||||||
optional_params["details"] = True
|
optional_params["details"] = True
|
||||||
|
elif custom_llm_provider == "sagemaker":
|
||||||
|
if "llama-2" in model:
|
||||||
|
# llama-2 models on sagemaker support the following args
|
||||||
|
"""
|
||||||
|
max_new_tokens: Model generates text until the output length (excluding the input context length) reaches max_new_tokens. If specified, it must be a positive integer.
|
||||||
|
temperature: Controls the randomness in the output. Higher temperature results in output sequence with low-probability words and lower temperature results in output sequence with high-probability words. If temperature -> 0, it results in greedy decoding. If specified, it must be a positive float.
|
||||||
|
top_p: In each step of text generation, sample from the smallest possible set of words with cumulative probability top_p. If specified, it must be a float between 0 and 1.
|
||||||
|
return_full_text: If True, input text will be part of the output generated text. If specified, it must be boolean. The default value for it is False.
|
||||||
|
"""
|
||||||
|
if max_tokens != float("inf"):
|
||||||
|
optional_params["max_new_tokens"] = max_tokens
|
||||||
|
if temperature != 1:
|
||||||
|
optional_params["temperature"] = temperature
|
||||||
|
if top_p != 1:
|
||||||
|
optional_params["top_p"] = top_p
|
||||||
elif model in litellm.aleph_alpha_models:
|
elif model in litellm.aleph_alpha_models:
|
||||||
if max_tokens != float("inf"):
|
if max_tokens != float("inf"):
|
||||||
optional_params["maximum_tokens"] = max_tokens
|
optional_params["maximum_tokens"] = max_tokens
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue