mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
added support for bedrock llama models
This commit is contained in:
parent
d4de55b053
commit
fe7e3ff038
4 changed files with 68 additions and 0 deletions
|
@ -172,6 +172,36 @@ class AnthropicConstants(Enum):
|
||||||
HUMAN_PROMPT = "\n\nHuman: "
|
HUMAN_PROMPT = "\n\nHuman: "
|
||||||
AI_PROMPT = "\n\nAssistant: "
|
AI_PROMPT = "\n\nAssistant: "
|
||||||
|
|
||||||
|
class AmazonLlamaConfig():
|
||||||
|
"""
|
||||||
|
Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1
|
||||||
|
|
||||||
|
Supported Params for the Amazon / Meta Llama models:
|
||||||
|
|
||||||
|
- `max_gen_len` (integer) max tokens,
|
||||||
|
- `temperature` (float) temperature for model,
|
||||||
|
- `top_p` (float) top p for model
|
||||||
|
"""
|
||||||
|
max_gen_len: Optional[int]=None
|
||||||
|
temperature: Optional[float]=None
|
||||||
|
topP: Optional[float]=None
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
maxTokenCount: Optional[int]=None,
|
||||||
|
temperature: Optional[float]=None,
|
||||||
|
topP: Optional[int]=None) -> None:
|
||||||
|
locals_ = locals()
|
||||||
|
for key, value in locals_.items():
|
||||||
|
if key != 'self' and value is not None:
|
||||||
|
setattr(self.__class__, key, value)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_config(cls):
|
||||||
|
return {k: v for k, v in cls.__dict__.items()
|
||||||
|
if not k.startswith('__')
|
||||||
|
and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
|
||||||
|
and v is not None}
|
||||||
|
|
||||||
|
|
||||||
def init_bedrock_client(
|
def init_bedrock_client(
|
||||||
region_name = None,
|
region_name = None,
|
||||||
|
@ -337,6 +367,16 @@ def completion(
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
**inference_params
|
**inference_params
|
||||||
})
|
})
|
||||||
|
elif provider == "meta":
|
||||||
|
## LOAD CONFIG
|
||||||
|
config = litellm.AmazonLlamaConfig.get_config()
|
||||||
|
for k, v in config.items():
|
||||||
|
if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||||
|
inference_params[k] = v
|
||||||
|
data = json.dumps({
|
||||||
|
"prompt": prompt,
|
||||||
|
**inference_params
|
||||||
|
})
|
||||||
elif provider == "amazon": # amazon titan
|
elif provider == "amazon": # amazon titan
|
||||||
## LOAD CONFIG
|
## LOAD CONFIG
|
||||||
config = litellm.AmazonTitanConfig.get_config()
|
config = litellm.AmazonTitanConfig.get_config()
|
||||||
|
@ -398,6 +438,8 @@ def completion(
|
||||||
model_response["finish_reason"] = response_body["stop_reason"]
|
model_response["finish_reason"] = response_body["stop_reason"]
|
||||||
elif provider == "cohere":
|
elif provider == "cohere":
|
||||||
outputText = response_body["generations"][0]["text"]
|
outputText = response_body["generations"][0]["text"]
|
||||||
|
elif provider == "meta":
|
||||||
|
outputText = response_body["generation"]
|
||||||
else: # amazon titan
|
else: # amazon titan
|
||||||
outputText = response_body.get('results')[0].get('outputText')
|
outputText = response_body.get('results')[0].get('outputText')
|
||||||
|
|
||||||
|
|
|
@ -522,6 +522,13 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"meta.llama2-13b-chat-v1": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.00000075,
|
||||||
|
"output_cost_per_token": 0.000001,
|
||||||
|
"litellm_provider": "bedrock",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"together-ai-up-to-3b": {
|
"together-ai-up-to-3b": {
|
||||||
"input_cost_per_token": 0.0000001,
|
"input_cost_per_token": 0.0000001,
|
||||||
"output_cost_per_token": 0.0000001
|
"output_cost_per_token": 0.0000001
|
||||||
|
|
|
@ -1875,6 +1875,18 @@ def get_optional_params( # use the openai defaults
|
||||||
optional_params["topP"] = top_p
|
optional_params["topP"] = top_p
|
||||||
if stream:
|
if stream:
|
||||||
optional_params["stream"] = stream
|
optional_params["stream"] = stream
|
||||||
|
elif "meta" in model: # amazon / meta llms
|
||||||
|
supported_params = ["max_tokens", "temperature", "top_p", "stream"]
|
||||||
|
_check_valid_arg(supported_params=supported_params)
|
||||||
|
# see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large
|
||||||
|
if max_tokens:
|
||||||
|
optional_params["max_gen_len"] = max_tokens
|
||||||
|
if temperature:
|
||||||
|
optional_params["temperature"] = temperature
|
||||||
|
if top_p:
|
||||||
|
optional_params["top_p"] = top_p
|
||||||
|
if stream:
|
||||||
|
optional_params["stream"] = stream
|
||||||
elif "cohere" in model: # cohere models on bedrock
|
elif "cohere" in model: # cohere models on bedrock
|
||||||
supported_params = ["stream", "temperature", "max_tokens", "logit_bias", "top_p", "frequency_penalty", "presence_penalty", "stop"]
|
supported_params = ["stream", "temperature", "max_tokens", "logit_bias", "top_p", "frequency_penalty", "presence_penalty", "stop"]
|
||||||
_check_valid_arg(supported_params=supported_params)
|
_check_valid_arg(supported_params=supported_params)
|
||||||
|
|
|
@ -593,6 +593,13 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"meta.llama2-13b-chat-v1": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.00000075,
|
||||||
|
"output_cost_per_token": 0.000001,
|
||||||
|
"litellm_provider": "bedrock",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"together-ai-up-to-3b": {
|
"together-ai-up-to-3b": {
|
||||||
"input_cost_per_token": 0.0000001,
|
"input_cost_per_token": 0.0000001,
|
||||||
"output_cost_per_token": 0.0000001
|
"output_cost_per_token": 0.0000001
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue