diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py index 6cee56e86..fc668ab9d 100644 --- a/litellm/llms/bedrock.py +++ b/litellm/llms/bedrock.py @@ -172,6 +172,36 @@ class AnthropicConstants(Enum): HUMAN_PROMPT = "\n\nHuman: " AI_PROMPT = "\n\nAssistant: " +class AmazonLlamaConfig(): + """ + Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1 + + Supported Params for the Amazon / Meta Llama models: + + - `max_gen_len` (integer) max tokens, + - `temperature` (float) temperature for model, + - `top_p` (float) top p for model + """ + max_gen_len: Optional[int]=None + temperature: Optional[float]=None + topP: Optional[float]=None + + def __init__(self, + maxTokenCount: Optional[int]=None, + temperature: Optional[float]=None, + topP: Optional[int]=None) -> None: + locals_ = locals() + for key, value in locals_.items(): + if key != 'self' and value is not None: + setattr(self.__class__, key, value) + + @classmethod + def get_config(cls): + return {k: v for k, v in cls.__dict__.items() + if not k.startswith('__') + and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod)) + and v is not None} + def init_bedrock_client( region_name = None, @@ -337,6 +367,16 @@ def completion( "prompt": prompt, **inference_params }) + elif provider == "meta": + ## LOAD CONFIG + config = litellm.AmazonLlamaConfig.get_config() + for k, v in config.items(): + if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in + inference_params[k] = v + data = json.dumps({ + "prompt": prompt, + **inference_params + }) elif provider == "amazon": # amazon titan ## LOAD CONFIG config = litellm.AmazonTitanConfig.get_config() @@ -398,6 +438,8 @@ def completion( model_response["finish_reason"] = response_body["stop_reason"] elif provider == "cohere": outputText = response_body["generations"][0]["text"] + elif provider == "meta": + outputText = response_body["generation"] else: # amazon titan outputText = response_body.get('results')[0].get('outputText') diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index e23a286d6..2c49dc02d 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -522,6 +522,13 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "meta.llama2-13b-chat-v1": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000075, + "output_cost_per_token": 0.000001, + "litellm_provider": "bedrock", + "mode": "chat" + }, "together-ai-up-to-3b": { "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.0000001 diff --git a/litellm/utils.py b/litellm/utils.py index 0a272e1eb..1475dcb58 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1875,6 +1875,18 @@ def get_optional_params( # use the openai defaults optional_params["topP"] = top_p if stream: optional_params["stream"] = stream + elif "meta" in model: # amazon / meta llms + supported_params = ["max_tokens", "temperature", "top_p", "stream"] + _check_valid_arg(supported_params=supported_params) + # see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large + if max_tokens: + optional_params["max_gen_len"] = max_tokens + if temperature: + optional_params["temperature"] = temperature + if top_p: + optional_params["top_p"] = top_p + if stream: + optional_params["stream"] = stream elif "cohere" in model: # cohere models on bedrock supported_params = ["stream", "temperature", "max_tokens", "logit_bias", "top_p", "frequency_penalty", "presence_penalty", "stop"] _check_valid_arg(supported_params=supported_params) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 776577747..d35a9d393 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -593,6 +593,13 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "meta.llama2-13b-chat-v1": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000075, + "output_cost_per_token": 0.000001, + "litellm_provider": "bedrock", + "mode": "chat" + }, "together-ai-up-to-3b": { "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.0000001