added support for bedrock llama models

This commit is contained in:
David Christian 2023-11-13 15:41:21 -08:00
parent d4de55b053
commit fe7e3ff038
4 changed files with 68 additions and 0 deletions

View file

@ -172,6 +172,36 @@ class AnthropicConstants(Enum):
HUMAN_PROMPT = "\n\nHuman: " HUMAN_PROMPT = "\n\nHuman: "
AI_PROMPT = "\n\nAssistant: " AI_PROMPT = "\n\nAssistant: "
class AmazonLlamaConfig():
"""
Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1
Supported Params for the Amazon / Meta Llama models:
- `max_gen_len` (integer) max tokens,
- `temperature` (float) temperature for model,
- `top_p` (float) top p for model
"""
max_gen_len: Optional[int]=None
temperature: Optional[float]=None
topP: Optional[float]=None
def __init__(self,
maxTokenCount: Optional[int]=None,
temperature: Optional[float]=None,
topP: Optional[int]=None) -> None:
locals_ = locals()
for key, value in locals_.items():
if key != 'self' and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {k: v for k, v in cls.__dict__.items()
if not k.startswith('__')
and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
and v is not None}
def init_bedrock_client( def init_bedrock_client(
region_name = None, region_name = None,
@ -337,6 +367,16 @@ def completion(
"prompt": prompt, "prompt": prompt,
**inference_params **inference_params
}) })
elif provider == "meta":
## LOAD CONFIG
config = litellm.AmazonLlamaConfig.get_config()
for k, v in config.items():
if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
inference_params[k] = v
data = json.dumps({
"prompt": prompt,
**inference_params
})
elif provider == "amazon": # amazon titan elif provider == "amazon": # amazon titan
## LOAD CONFIG ## LOAD CONFIG
config = litellm.AmazonTitanConfig.get_config() config = litellm.AmazonTitanConfig.get_config()
@ -398,6 +438,8 @@ def completion(
model_response["finish_reason"] = response_body["stop_reason"] model_response["finish_reason"] = response_body["stop_reason"]
elif provider == "cohere": elif provider == "cohere":
outputText = response_body["generations"][0]["text"] outputText = response_body["generations"][0]["text"]
elif provider == "meta":
outputText = response_body["generation"]
else: # amazon titan else: # amazon titan
outputText = response_body.get('results')[0].get('outputText') outputText = response_body.get('results')[0].get('outputText')

View file

@ -522,6 +522,13 @@
"litellm_provider": "bedrock", "litellm_provider": "bedrock",
"mode": "chat" "mode": "chat"
}, },
"meta.llama2-13b-chat-v1": {
"max_tokens": 4096,
"input_cost_per_token": 0.00000075,
"output_cost_per_token": 0.000001,
"litellm_provider": "bedrock",
"mode": "chat"
},
"together-ai-up-to-3b": { "together-ai-up-to-3b": {
"input_cost_per_token": 0.0000001, "input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.0000001 "output_cost_per_token": 0.0000001

View file

@ -1875,6 +1875,18 @@ def get_optional_params( # use the openai defaults
optional_params["topP"] = top_p optional_params["topP"] = top_p
if stream: if stream:
optional_params["stream"] = stream optional_params["stream"] = stream
elif "meta" in model: # amazon / meta llms
supported_params = ["max_tokens", "temperature", "top_p", "stream"]
_check_valid_arg(supported_params=supported_params)
# see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large
if max_tokens:
optional_params["max_gen_len"] = max_tokens
if temperature:
optional_params["temperature"] = temperature
if top_p:
optional_params["top_p"] = top_p
if stream:
optional_params["stream"] = stream
elif "cohere" in model: # cohere models on bedrock elif "cohere" in model: # cohere models on bedrock
supported_params = ["stream", "temperature", "max_tokens", "logit_bias", "top_p", "frequency_penalty", "presence_penalty", "stop"] supported_params = ["stream", "temperature", "max_tokens", "logit_bias", "top_p", "frequency_penalty", "presence_penalty", "stop"]
_check_valid_arg(supported_params=supported_params) _check_valid_arg(supported_params=supported_params)

View file

@ -593,6 +593,13 @@
"litellm_provider": "bedrock", "litellm_provider": "bedrock",
"mode": "chat" "mode": "chat"
}, },
"meta.llama2-13b-chat-v1": {
"max_tokens": 4096,
"input_cost_per_token": 0.00000075,
"output_cost_per_token": 0.000001,
"litellm_provider": "bedrock",
"mode": "chat"
},
"together-ai-up-to-3b": { "together-ai-up-to-3b": {
"input_cost_per_token": 0.0000001, "input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.0000001 "output_cost_per_token": 0.0000001