Merge pull request #811 from dchristian3188/bedrock-llama

Bedrock llama
2025-04-24 18:24:20 +00:00 · 2023-11-16 07:57:50 -08:00 · 2023-11-16 07:57:50 -08:00 · d6ad62d793
commit d6ad62d793
parent 3f95fd2da5 45ae4a76e4
5 changed files with 72 additions and 0 deletions
--- a/docs/my-website/docs/providers/bedrock.md
+++ b/docs/my-website/docs/providers/bedrock.md
@ -145,6 +145,7 @@ Here's an example of using a bedrock model with LiteLLM
 | Cohere Command              | `completion(model='cohere.command-text-v14', messages=messages)`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
 | AI21 J2-Mid             | `completion(model='ai21.j2-mid-v1', messages=messages)`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
 | AI21 J2-Ultra              | `completion(model='ai21.j2-ultra-v1', messages=messages)`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
+| Meta Llama 2 Chat 13b              | `completion(model='meta.llama2-13b-chat-v1', messages=messages)`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |

 ## Bedrock Embedding

--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@ -172,6 +172,36 @@ class AnthropicConstants(Enum):
    HUMAN_PROMPT = "\n\nHuman: "
    AI_PROMPT = "\n\nAssistant: "

+class AmazonLlamaConfig(): 
+    """
+    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1
+
+    Supported Params for the Amazon / Meta Llama models:
+
+    - `max_gen_len` (integer) max tokens,
+    - `temperature` (float) temperature for model,
+    - `top_p` (float) top p for model
+    """
+    max_gen_len: Optional[int]=None
+    temperature: Optional[float]=None
+    topP: Optional[float]=None
+
+    def __init__(self, 
+                 maxTokenCount: Optional[int]=None,
+                 temperature: Optional[float]=None,
+                 topP: Optional[int]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items() 
+                if not k.startswith('__') 
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod)) 
+                and v is not None}
+

 def init_bedrock_client(
        region_name = None,
@ -337,6 +367,16 @@ def completion(
                "prompt": prompt,
                **inference_params
            })
+        elif provider == "meta":
+            ## LOAD CONFIG
+            config = litellm.AmazonLlamaConfig.get_config()
+            for k, v in config.items(): 
+                if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+            data = json.dumps({
+                "prompt": prompt,
+                **inference_params
+            })
        elif provider == "amazon":  # amazon titan
            ## LOAD CONFIG
            config = litellm.AmazonTitanConfig.get_config() 
@ -398,6 +438,8 @@ def completion(
            model_response["finish_reason"] = response_body["stop_reason"]
        elif provider == "cohere": 
            outputText = response_body["generations"][0]["text"]
+        elif provider == "meta": 
+            outputText = response_body["generation"]
        else:  # amazon titan
            outputText = response_body.get('results')[0].get('outputText')

--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -522,6 +522,13 @@
        "litellm_provider": "bedrock",
        "mode": "chat"
    },
+    "meta.llama2-13b-chat-v1": {
+        "max_tokens": 4096, 
+        "input_cost_per_token": 0.00000075,
+        "output_cost_per_token": 0.000001,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
    "together-ai-up-to-3b": {
        "input_cost_per_token": 0.0000001,
        "output_cost_per_token": 0.0000001
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2036,6 +2036,18 @@ def get_optional_params(  # use the openai defaults
                optional_params["topP"] = top_p
            if stream: 
                optional_params["stream"] = stream
+        elif "meta" in model: # amazon / meta llms
+            supported_params = ["max_tokens", "temperature", "top_p", "stream"]
+            _check_valid_arg(supported_params=supported_params)
+            # see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large
+            if max_tokens:
+                optional_params["max_gen_len"] = max_tokens
+            if temperature:
+                optional_params["temperature"] = temperature
+            if top_p:
+                optional_params["top_p"] = top_p
+            if stream: 
+                optional_params["stream"] = stream
        elif "cohere" in model: # cohere models on bedrock
            supported_params = ["stream", "temperature", "max_tokens", "logit_bias", "top_p", "frequency_penalty", "presence_penalty", "stop"]
            _check_valid_arg(supported_params=supported_params)
@ -4583,6 +4595,9 @@ class CustomStreamWrapper:
                    is_finished = True
                    finish_reason = stop_reason
            ######## bedrock.cohere mappings ###############
+            # meta mapping
+            elif "generation" in chunk_data:
+                text = chunk_data['generation'] # bedrock.meta
            # cohere mapping
            elif "text" in chunk_data:
                text = chunk_data["text"] # bedrock.cohere
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -595,6 +595,13 @@
        "litellm_provider": "bedrock",
        "mode": "chat"
    },
+    "meta.llama2-13b-chat-v1": {
+        "max_tokens": 4096, 
+        "input_cost_per_token": 0.00000075,
+        "output_cost_per_token": 0.000001,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
    "together-ai-up-to-3b": {
        "input_cost_per_token": 0.0000001,
        "output_cost_per_token": 0.0000001