mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
Litellm dev 02 13 2025 p2 (#8525)
* fix(azure/chat/gpt_transformation.py): add 'prediction' as a support azure param Closes https://github.com/BerriAI/litellm/issues/8500 * build(model_prices_and_context_window.json): add new 'gemini-2.0-pro-exp-02-05' model * style: cleanup invalid json trailing commma * feat(utils.py): support passing 'tokenizer_config' to register_prompt_template enables passing complete tokenizer config of model to litellm Allows calling deepseek on bedrock with the correct prompt template * fix(utils.py): fix register_prompt_template for custom model names * test(test_prompt_factory.py): fix test * test(test_completion.py): add e2e test for bedrock invoke deepseek ft model * feat(base_invoke_transformation.py): support hf_model_name param for bedrock invoke calls enables proxy admin to set base model for ft bedrock deepseek model * feat(bedrock/invoke): support deepseek_r1 route for bedrock makes it easy to apply the right chat template to that call * feat(constants.py): store deepseek r1 chat template - allow user to get correct response from deepseek r1 without extra work * test(test_completion.py): add e2e mock test for bedrock deepseek * docs(bedrock.md): document new deepseek_r1 route for bedrock allows us to use the right config * fix(exception_mapping_utils.py): catch read operation timeout
This commit is contained in:
parent
be31a5f7b4
commit
e33543ae4f
15 changed files with 444 additions and 39 deletions
|
@ -3242,6 +3242,121 @@ def test_replicate_custom_prompt_dict():
|
|||
litellm.custom_prompt_dict = {} # reset
|
||||
|
||||
|
||||
def test_bedrock_deepseek_custom_prompt_dict():
|
||||
model = "llama/arn:aws:bedrock:us-east-1:1234:imported-model/45d34re"
|
||||
litellm.register_prompt_template(
|
||||
model=model,
|
||||
tokenizer_config={
|
||||
"add_bos_token": True,
|
||||
"add_eos_token": False,
|
||||
"bos_token": {
|
||||
"__type": "AddedToken",
|
||||
"content": "<|begin▁of▁sentence|>",
|
||||
"lstrip": False,
|
||||
"normalized": True,
|
||||
"rstrip": False,
|
||||
"single_word": False,
|
||||
},
|
||||
"clean_up_tokenization_spaces": False,
|
||||
"eos_token": {
|
||||
"__type": "AddedToken",
|
||||
"content": "<|end▁of▁sentence|>",
|
||||
"lstrip": False,
|
||||
"normalized": True,
|
||||
"rstrip": False,
|
||||
"single_word": False,
|
||||
},
|
||||
"legacy": True,
|
||||
"model_max_length": 16384,
|
||||
"pad_token": {
|
||||
"__type": "AddedToken",
|
||||
"content": "<|end▁of▁sentence|>",
|
||||
"lstrip": False,
|
||||
"normalized": True,
|
||||
"rstrip": False,
|
||||
"single_word": False,
|
||||
},
|
||||
"sp_model_kwargs": {},
|
||||
"unk_token": None,
|
||||
"tokenizer_class": "LlamaTokenizerFast",
|
||||
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
||||
},
|
||||
)
|
||||
assert model in litellm.known_tokenizer_config
|
||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||
|
||||
client = HTTPHandler()
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a good assistant"},
|
||||
{"role": "user", "content": "What is the weather in Copenhagen?"},
|
||||
]
|
||||
|
||||
with patch.object(client, "post") as mock_post:
|
||||
try:
|
||||
completion(
|
||||
model="bedrock/" + model,
|
||||
messages=messages,
|
||||
client=client,
|
||||
)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
mock_post.assert_called_once()
|
||||
print(mock_post.call_args.kwargs)
|
||||
json_data = json.loads(mock_post.call_args.kwargs["data"])
|
||||
assert (
|
||||
json_data["prompt"].rstrip()
|
||||
== """<|begin▁of▁sentence|>You are a good assistant<|User|>What is the weather in Copenhagen?<|Assistant|><think>"""
|
||||
)
|
||||
|
||||
|
||||
def test_bedrock_deepseek_known_tokenizer_config():
|
||||
model = "deepseek_r1/arn:aws:bedrock:us-east-1:1234:imported-model/45d34re"
|
||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||
from unittest.mock import Mock
|
||||
import httpx
|
||||
|
||||
mock_response = Mock(spec=httpx.Response)
|
||||
mock_response.status_code = 200
|
||||
mock_response.headers = {
|
||||
"x-amzn-bedrock-input-token-count": "20",
|
||||
"x-amzn-bedrock-output-token-count": "30",
|
||||
}
|
||||
|
||||
# The response format for deepseek_r1
|
||||
response_data = {
|
||||
"generation": "The weather in Copenhagen is currently sunny with a temperature of 20°C (68°F). The forecast shows clear skies throughout the day with a gentle breeze from the northwest.",
|
||||
"stop_reason": "stop",
|
||||
"stop_sequence": None,
|
||||
}
|
||||
|
||||
mock_response.json.return_value = response_data
|
||||
mock_response.text = json.dumps(response_data)
|
||||
|
||||
client = HTTPHandler()
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a good assistant"},
|
||||
{"role": "user", "content": "What is the weather in Copenhagen?"},
|
||||
]
|
||||
|
||||
with patch.object(client, "post", return_value=mock_response) as mock_post:
|
||||
completion(
|
||||
model="bedrock/" + model,
|
||||
messages=messages,
|
||||
client=client,
|
||||
)
|
||||
|
||||
mock_post.assert_called_once()
|
||||
print(mock_post.call_args.kwargs)
|
||||
json_data = json.loads(mock_post.call_args.kwargs["data"])
|
||||
assert (
|
||||
json_data["prompt"].rstrip()
|
||||
== """<|begin▁of▁sentence|>You are a good assistant<|User|>What is the weather in Copenhagen?<|Assistant|><think>"""
|
||||
)
|
||||
|
||||
|
||||
# test_replicate_custom_prompt_dict()
|
||||
|
||||
# commenthing this out since we won't be always testing a custom, replicate deployment
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue