mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
Merge pull request #811 from dchristian3188/bedrock-llama
Bedrock llama
This commit is contained in:
commit
d6ad62d793
5 changed files with 72 additions and 0 deletions
|
@ -145,6 +145,7 @@ Here's an example of using a bedrock model with LiteLLM
|
|||
| Cohere Command | `completion(model='cohere.command-text-v14', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
|
||||
| AI21 J2-Mid | `completion(model='ai21.j2-mid-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
|
||||
| AI21 J2-Ultra | `completion(model='ai21.j2-ultra-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
|
||||
| Meta Llama 2 Chat 13b | `completion(model='meta.llama2-13b-chat-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
|
||||
|
||||
## Bedrock Embedding
|
||||
|
||||
|
|
|
@ -172,6 +172,36 @@ class AnthropicConstants(Enum):
|
|||
HUMAN_PROMPT = "\n\nHuman: "
|
||||
AI_PROMPT = "\n\nAssistant: "
|
||||
|
||||
class AmazonLlamaConfig():
|
||||
"""
|
||||
Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1
|
||||
|
||||
Supported Params for the Amazon / Meta Llama models:
|
||||
|
||||
- `max_gen_len` (integer) max tokens,
|
||||
- `temperature` (float) temperature for model,
|
||||
- `top_p` (float) top p for model
|
||||
"""
|
||||
max_gen_len: Optional[int]=None
|
||||
temperature: Optional[float]=None
|
||||
topP: Optional[float]=None
|
||||
|
||||
def __init__(self,
|
||||
maxTokenCount: Optional[int]=None,
|
||||
temperature: Optional[float]=None,
|
||||
topP: Optional[int]=None) -> None:
|
||||
locals_ = locals()
|
||||
for key, value in locals_.items():
|
||||
if key != 'self' and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {k: v for k, v in cls.__dict__.items()
|
||||
if not k.startswith('__')
|
||||
and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
|
||||
and v is not None}
|
||||
|
||||
|
||||
def init_bedrock_client(
|
||||
region_name = None,
|
||||
|
@ -337,6 +367,16 @@ def completion(
|
|||
"prompt": prompt,
|
||||
**inference_params
|
||||
})
|
||||
elif provider == "meta":
|
||||
## LOAD CONFIG
|
||||
config = litellm.AmazonLlamaConfig.get_config()
|
||||
for k, v in config.items():
|
||||
if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||
inference_params[k] = v
|
||||
data = json.dumps({
|
||||
"prompt": prompt,
|
||||
**inference_params
|
||||
})
|
||||
elif provider == "amazon": # amazon titan
|
||||
## LOAD CONFIG
|
||||
config = litellm.AmazonTitanConfig.get_config()
|
||||
|
@ -398,6 +438,8 @@ def completion(
|
|||
model_response["finish_reason"] = response_body["stop_reason"]
|
||||
elif provider == "cohere":
|
||||
outputText = response_body["generations"][0]["text"]
|
||||
elif provider == "meta":
|
||||
outputText = response_body["generation"]
|
||||
else: # amazon titan
|
||||
outputText = response_body.get('results')[0].get('outputText')
|
||||
|
||||
|
|
|
@ -522,6 +522,13 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "chat"
|
||||
},
|
||||
"meta.llama2-13b-chat-v1": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000075,
|
||||
"output_cost_per_token": 0.000001,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat"
|
||||
},
|
||||
"together-ai-up-to-3b": {
|
||||
"input_cost_per_token": 0.0000001,
|
||||
"output_cost_per_token": 0.0000001
|
||||
|
|
|
@ -2036,6 +2036,18 @@ def get_optional_params( # use the openai defaults
|
|||
optional_params["topP"] = top_p
|
||||
if stream:
|
||||
optional_params["stream"] = stream
|
||||
elif "meta" in model: # amazon / meta llms
|
||||
supported_params = ["max_tokens", "temperature", "top_p", "stream"]
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
# see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large
|
||||
if max_tokens:
|
||||
optional_params["max_gen_len"] = max_tokens
|
||||
if temperature:
|
||||
optional_params["temperature"] = temperature
|
||||
if top_p:
|
||||
optional_params["top_p"] = top_p
|
||||
if stream:
|
||||
optional_params["stream"] = stream
|
||||
elif "cohere" in model: # cohere models on bedrock
|
||||
supported_params = ["stream", "temperature", "max_tokens", "logit_bias", "top_p", "frequency_penalty", "presence_penalty", "stop"]
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
|
@ -4583,6 +4595,9 @@ class CustomStreamWrapper:
|
|||
is_finished = True
|
||||
finish_reason = stop_reason
|
||||
######## bedrock.cohere mappings ###############
|
||||
# meta mapping
|
||||
elif "generation" in chunk_data:
|
||||
text = chunk_data['generation'] # bedrock.meta
|
||||
# cohere mapping
|
||||
elif "text" in chunk_data:
|
||||
text = chunk_data["text"] # bedrock.cohere
|
||||
|
|
|
@ -595,6 +595,13 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "chat"
|
||||
},
|
||||
"meta.llama2-13b-chat-v1": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000075,
|
||||
"output_cost_per_token": 0.000001,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat"
|
||||
},
|
||||
"together-ai-up-to-3b": {
|
||||
"input_cost_per_token": 0.0000001,
|
||||
"output_cost_per_token": 0.0000001
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue