added support for bedrock llama models

2025-04-24 18:24:20 +00:00 · 2023-11-13 15:41:21 -08:00 · 2023-11-13 15:41:21 -08:00 · fe7e3ff038
commit fe7e3ff038
parent d4de55b053
4 changed files with 68 additions and 0 deletions
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@ -172,6 +172,36 @@ class AnthropicConstants(Enum):
    HUMAN_PROMPT = "\n\nHuman: "
    AI_PROMPT = "\n\nAssistant: "
 class AmazonLlamaConfig(): 
    """
    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1
    Supported Params for the Amazon / Meta Llama models:
    - `max_gen_len` (integer) max tokens,
    - `temperature` (float) temperature for model,
    - `top_p` (float) top p for model
    """
    max_gen_len: Optional[int]=None
    temperature: Optional[float]=None
    topP: Optional[float]=None
    def __init__(self, 
                 maxTokenCount: Optional[int]=None,
                 temperature: Optional[float]=None,
                 topP: Optional[int]=None) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != 'self' and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {k: v for k, v in cls.__dict__.items() 
                if not k.startswith('__') 
                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod)) 
                and v is not None}
 def init_bedrock_client(
        region_name = None,
@ -337,6 +367,16 @@ def completion(
                "prompt": prompt,
                **inference_params
            })
        elif provider == "meta":
            ## LOAD CONFIG
            config = litellm.AmazonLlamaConfig.get_config()
            for k, v in config.items(): 
                if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
                    inference_params[k] = v
            data = json.dumps({
                "prompt": prompt,
                **inference_params
            })
        elif provider == "amazon":  # amazon titan
            ## LOAD CONFIG
            config = litellm.AmazonTitanConfig.get_config() 
@ -398,6 +438,8 @@ def completion(
            model_response["finish_reason"] = response_body["stop_reason"]
        elif provider == "cohere": 
            outputText = response_body["generations"][0]["text"]
        elif provider == "meta": 
            outputText = response_body["generation"]
        else:  # amazon titan
            outputText = response_body.get('results')[0].get('outputText')
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -522,6 +522,13 @@
        "litellm_provider": "bedrock",
        "mode": "chat"
    },
    "meta.llama2-13b-chat-v1": {
        "max_tokens": 4096, 
        "input_cost_per_token": 0.00000075,
        "output_cost_per_token": 0.000001,
        "litellm_provider": "bedrock",
        "mode": "chat"
    },
    "together-ai-up-to-3b": {
        "input_cost_per_token": 0.0000001,
        "output_cost_per_token": 0.0000001
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1875,6 +1875,18 @@ def get_optional_params(  # use the openai defaults
                optional_params["topP"] = top_p
            if stream: 
                optional_params["stream"] = stream
        elif "meta" in model: # amazon / meta llms
            supported_params = ["max_tokens", "temperature", "top_p", "stream"]
            _check_valid_arg(supported_params=supported_params)
            # see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large
            if max_tokens:
                optional_params["max_gen_len"] = max_tokens
            if temperature:
                optional_params["temperature"] = temperature
            if top_p:
                optional_params["top_p"] = top_p
            if stream: 
                optional_params["stream"] = stream
        elif "cohere" in model: # cohere models on bedrock
            supported_params = ["stream", "temperature", "max_tokens", "logit_bias", "top_p", "frequency_penalty", "presence_penalty", "stop"]
            _check_valid_arg(supported_params=supported_params)
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -593,6 +593,13 @@
        "litellm_provider": "bedrock",
        "mode": "chat"
    },
    "meta.llama2-13b-chat-v1": {
        "max_tokens": 4096, 
        "input_cost_per_token": 0.00000075,
        "output_cost_per_token": 0.000001,
        "litellm_provider": "bedrock",
        "mode": "chat"
    },
    "together-ai-up-to-3b": {
        "input_cost_per_token": 0.0000001,
        "output_cost_per_token": 0.0000001