Merge pull request #811 from dchristian3188/bedrock-llama

Bedrock llama
This commit is contained in:
Ishaan Jaff 2023-11-16 07:57:50 -08:00 committed by GitHub
commit d6ad62d793
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 72 additions and 0 deletions

View file

@ -145,6 +145,7 @@ Here's an example of using a bedrock model with LiteLLM
| Cohere Command | `completion(model='cohere.command-text-v14', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` | | Cohere Command | `completion(model='cohere.command-text-v14', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
| AI21 J2-Mid | `completion(model='ai21.j2-mid-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` | | AI21 J2-Mid | `completion(model='ai21.j2-mid-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
| AI21 J2-Ultra | `completion(model='ai21.j2-ultra-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` | | AI21 J2-Ultra | `completion(model='ai21.j2-ultra-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
| Meta Llama 2 Chat 13b | `completion(model='meta.llama2-13b-chat-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
## Bedrock Embedding ## Bedrock Embedding

View file

@ -172,6 +172,36 @@ class AnthropicConstants(Enum):
HUMAN_PROMPT = "\n\nHuman: " HUMAN_PROMPT = "\n\nHuman: "
AI_PROMPT = "\n\nAssistant: " AI_PROMPT = "\n\nAssistant: "
class AmazonLlamaConfig():
"""
Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1
Supported Params for the Amazon / Meta Llama models:
- `max_gen_len` (integer) max tokens,
- `temperature` (float) temperature for model,
- `top_p` (float) top p for model
"""
max_gen_len: Optional[int]=None
temperature: Optional[float]=None
topP: Optional[float]=None
def __init__(self,
maxTokenCount: Optional[int]=None,
temperature: Optional[float]=None,
topP: Optional[int]=None) -> None:
locals_ = locals()
for key, value in locals_.items():
if key != 'self' and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {k: v for k, v in cls.__dict__.items()
if not k.startswith('__')
and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
and v is not None}
def init_bedrock_client( def init_bedrock_client(
region_name = None, region_name = None,
@ -337,6 +367,16 @@ def completion(
"prompt": prompt, "prompt": prompt,
**inference_params **inference_params
}) })
elif provider == "meta":
## LOAD CONFIG
config = litellm.AmazonLlamaConfig.get_config()
for k, v in config.items():
if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
inference_params[k] = v
data = json.dumps({
"prompt": prompt,
**inference_params
})
elif provider == "amazon": # amazon titan elif provider == "amazon": # amazon titan
## LOAD CONFIG ## LOAD CONFIG
config = litellm.AmazonTitanConfig.get_config() config = litellm.AmazonTitanConfig.get_config()
@ -398,6 +438,8 @@ def completion(
model_response["finish_reason"] = response_body["stop_reason"] model_response["finish_reason"] = response_body["stop_reason"]
elif provider == "cohere": elif provider == "cohere":
outputText = response_body["generations"][0]["text"] outputText = response_body["generations"][0]["text"]
elif provider == "meta":
outputText = response_body["generation"]
else: # amazon titan else: # amazon titan
outputText = response_body.get('results')[0].get('outputText') outputText = response_body.get('results')[0].get('outputText')

View file

@ -522,6 +522,13 @@
"litellm_provider": "bedrock", "litellm_provider": "bedrock",
"mode": "chat" "mode": "chat"
}, },
"meta.llama2-13b-chat-v1": {
"max_tokens": 4096,
"input_cost_per_token": 0.00000075,
"output_cost_per_token": 0.000001,
"litellm_provider": "bedrock",
"mode": "chat"
},
"together-ai-up-to-3b": { "together-ai-up-to-3b": {
"input_cost_per_token": 0.0000001, "input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.0000001 "output_cost_per_token": 0.0000001

View file

@ -2036,6 +2036,18 @@ def get_optional_params( # use the openai defaults
optional_params["topP"] = top_p optional_params["topP"] = top_p
if stream: if stream:
optional_params["stream"] = stream optional_params["stream"] = stream
elif "meta" in model: # amazon / meta llms
supported_params = ["max_tokens", "temperature", "top_p", "stream"]
_check_valid_arg(supported_params=supported_params)
# see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large
if max_tokens:
optional_params["max_gen_len"] = max_tokens
if temperature:
optional_params["temperature"] = temperature
if top_p:
optional_params["top_p"] = top_p
if stream:
optional_params["stream"] = stream
elif "cohere" in model: # cohere models on bedrock elif "cohere" in model: # cohere models on bedrock
supported_params = ["stream", "temperature", "max_tokens", "logit_bias", "top_p", "frequency_penalty", "presence_penalty", "stop"] supported_params = ["stream", "temperature", "max_tokens", "logit_bias", "top_p", "frequency_penalty", "presence_penalty", "stop"]
_check_valid_arg(supported_params=supported_params) _check_valid_arg(supported_params=supported_params)
@ -4583,6 +4595,9 @@ class CustomStreamWrapper:
is_finished = True is_finished = True
finish_reason = stop_reason finish_reason = stop_reason
######## bedrock.cohere mappings ############### ######## bedrock.cohere mappings ###############
# meta mapping
elif "generation" in chunk_data:
text = chunk_data['generation'] # bedrock.meta
# cohere mapping # cohere mapping
elif "text" in chunk_data: elif "text" in chunk_data:
text = chunk_data["text"] # bedrock.cohere text = chunk_data["text"] # bedrock.cohere

View file

@ -595,6 +595,13 @@
"litellm_provider": "bedrock", "litellm_provider": "bedrock",
"mode": "chat" "mode": "chat"
}, },
"meta.llama2-13b-chat-v1": {
"max_tokens": 4096,
"input_cost_per_token": 0.00000075,
"output_cost_per_token": 0.000001,
"litellm_provider": "bedrock",
"mode": "chat"
},
"together-ai-up-to-3b": { "together-ai-up-to-3b": {
"input_cost_per_token": 0.0000001, "input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.0000001 "output_cost_per_token": 0.0000001