Merge pull request #811 from dchristian3188/bedrock-llama

Bedrock llama
2025-04-24 18:24:20 +00:00 · 2023-11-16 07:57:50 -08:00 · 2023-11-16 07:57:50 -08:00 · d6ad62d793
commit d6ad62d793
parent 3f95fd2da5 45ae4a76e4
5 changed files with 72 additions and 0 deletions
--- a/docs/my-website/docs/providers/bedrock.md
+++ b/docs/my-website/docs/providers/bedrock.md
@ -145,6 +145,7 @@ Here's an example of using a bedrock model with LiteLLM
 | Cohere Command              | `completion(model='cohere.command-text-v14', messages=messages)`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
 | AI21 J2-Mid             | `completion(model='ai21.j2-mid-v1', messages=messages)`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
 | AI21 J2-Ultra              | `completion(model='ai21.j2-ultra-v1', messages=messages)`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
 | Meta Llama 2 Chat 13b              | `completion(model='meta.llama2-13b-chat-v1', messages=messages)`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
 ## Bedrock Embedding
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@ -172,6 +172,36 @@ class AnthropicConstants(Enum):
    HUMAN_PROMPT = "\n\nHuman: "
    AI_PROMPT = "\n\nAssistant: "
 class AmazonLlamaConfig(): 
    """
    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1
    Supported Params for the Amazon / Meta Llama models:
    - `max_gen_len` (integer) max tokens,
    - `temperature` (float) temperature for model,
    - `top_p` (float) top p for model
    """
    max_gen_len: Optional[int]=None
    temperature: Optional[float]=None
    topP: Optional[float]=None
    def __init__(self, 
                 maxTokenCount: Optional[int]=None,
                 temperature: Optional[float]=None,
                 topP: Optional[int]=None) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != 'self' and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {k: v for k, v in cls.__dict__.items() 
                if not k.startswith('__') 
                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod)) 
                and v is not None}
 def init_bedrock_client(
        region_name = None,
@ -337,6 +367,16 @@ def completion(
                "prompt": prompt,
                **inference_params
            })
        elif provider == "meta":
            ## LOAD CONFIG
            config = litellm.AmazonLlamaConfig.get_config()
            for k, v in config.items(): 
                if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
                    inference_params[k] = v
            data = json.dumps({
                "prompt": prompt,
                **inference_params
            })
        elif provider == "amazon":  # amazon titan
            ## LOAD CONFIG
            config = litellm.AmazonTitanConfig.get_config() 
@ -398,6 +438,8 @@ def completion(
            model_response["finish_reason"] = response_body["stop_reason"]
        elif provider == "cohere": 
            outputText = response_body["generations"][0]["text"]
        elif provider == "meta": 
            outputText = response_body["generation"]
        else:  # amazon titan
            outputText = response_body.get('results')[0].get('outputText')
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -522,6 +522,13 @@
        "litellm_provider": "bedrock",
        "mode": "chat"
    },
    "meta.llama2-13b-chat-v1": {
        "max_tokens": 4096, 
        "input_cost_per_token": 0.00000075,
        "output_cost_per_token": 0.000001,
        "litellm_provider": "bedrock",
        "mode": "chat"
    },
    "together-ai-up-to-3b": {
        "input_cost_per_token": 0.0000001,
        "output_cost_per_token": 0.0000001
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2036,6 +2036,18 @@ def get_optional_params(  # use the openai defaults
                optional_params["topP"] = top_p
            if stream: 
                optional_params["stream"] = stream
        elif "meta" in model: # amazon / meta llms
            supported_params = ["max_tokens", "temperature", "top_p", "stream"]
            _check_valid_arg(supported_params=supported_params)
            # see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large
            if max_tokens:
                optional_params["max_gen_len"] = max_tokens
            if temperature:
                optional_params["temperature"] = temperature
            if top_p:
                optional_params["top_p"] = top_p
            if stream: 
                optional_params["stream"] = stream
        elif "cohere" in model: # cohere models on bedrock
            supported_params = ["stream", "temperature", "max_tokens", "logit_bias", "top_p", "frequency_penalty", "presence_penalty", "stop"]
            _check_valid_arg(supported_params=supported_params)
@ -4583,6 +4595,9 @@ class CustomStreamWrapper:
                    is_finished = True
                    finish_reason = stop_reason
            ######## bedrock.cohere mappings ###############
            # meta mapping
            elif "generation" in chunk_data:
                text = chunk_data['generation'] # bedrock.meta
            # cohere mapping
            elif "text" in chunk_data:
                text = chunk_data["text"] # bedrock.cohere
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -595,6 +595,13 @@
        "litellm_provider": "bedrock",
        "mode": "chat"
    },
    "meta.llama2-13b-chat-v1": {
        "max_tokens": 4096, 
        "input_cost_per_token": 0.00000075,
        "output_cost_per_token": 0.000001,
        "litellm_provider": "bedrock",
        "mode": "chat"
    },
    "together-ai-up-to-3b": {
        "input_cost_per_token": 0.0000001,
        "output_cost_per_token": 0.0000001