Litellm dev 02 13 2025 p2 (#8525)

* fix(azure/chat/gpt_transformation.py): add 'prediction' as a support azure param

Closes https://github.com/BerriAI/litellm/issues/8500

* build(model_prices_and_context_window.json): add new 'gemini-2.0-pro-exp-02-05' model

* style: cleanup invalid json trailing commma

* feat(utils.py): support passing 'tokenizer_config' to register_prompt_template

enables passing complete tokenizer config of model to litellm

 Allows calling deepseek on bedrock with the correct prompt template

* fix(utils.py): fix register_prompt_template for custom model names

* test(test_prompt_factory.py): fix test

* test(test_completion.py): add e2e test for bedrock invoke deepseek ft model

* feat(base_invoke_transformation.py): support hf_model_name param for bedrock invoke calls

enables proxy admin to set base model for ft bedrock deepseek model

* feat(bedrock/invoke): support deepseek_r1 route for bedrock

makes it easy to apply the right chat template to that call

* feat(constants.py): store deepseek r1 chat template - allow user to get correct response from deepseek r1 without extra work

* test(test_completion.py): add e2e mock test for bedrock deepseek

* docs(bedrock.md): document new deepseek_r1 route for bedrock

allows us to use the right config

* fix(exception_mapping_utils.py): catch read operation timeout
This commit is contained in:
Krish Dholakia 2025-02-13 20:28:42 -08:00 committed by GitHub
parent be31a5f7b4
commit e33543ae4f
15 changed files with 444 additions and 39 deletions

View file

@ -3242,6 +3242,121 @@ def test_replicate_custom_prompt_dict():
litellm.custom_prompt_dict = {} # reset
def test_bedrock_deepseek_custom_prompt_dict():
model = "llama/arn:aws:bedrock:us-east-1:1234:imported-model/45d34re"
litellm.register_prompt_template(
model=model,
tokenizer_config={
"add_bos_token": True,
"add_eos_token": False,
"bos_token": {
"__type": "AddedToken",
"content": "<begin▁of▁sentence>",
"lstrip": False,
"normalized": True,
"rstrip": False,
"single_word": False,
},
"clean_up_tokenization_spaces": False,
"eos_token": {
"__type": "AddedToken",
"content": "<end▁of▁sentence>",
"lstrip": False,
"normalized": True,
"rstrip": False,
"single_word": False,
},
"legacy": True,
"model_max_length": 16384,
"pad_token": {
"__type": "AddedToken",
"content": "<end▁of▁sentence>",
"lstrip": False,
"normalized": True,
"rstrip": False,
"single_word": False,
},
"sp_model_kwargs": {},
"unk_token": None,
"tokenizer_class": "LlamaTokenizerFast",
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<User>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<Assistant><tool▁calls▁begin><tool▁call▁begin>' + tool['type'] + '<tool▁sep>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<tool▁call▁end>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<tool▁call▁begin>' + tool['type'] + '<tool▁sep>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<tool▁call▁end>'}}{{'<tool▁calls▁end><end▁of▁sentence>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<tool▁outputs▁end>' + message['content'] + '<end▁of▁sentence>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<Assistant>' + content + '<end▁of▁sentence>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<tool▁outputs▁begin><tool▁output▁begin>' + message['content'] + '<tool▁output▁end>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<tool▁output▁begin>' + message['content'] + '<tool▁output▁end>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<tool▁outputs▁end>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<Assistant><think>\\n'}}{% endif %}",
},
)
assert model in litellm.known_tokenizer_config
from litellm.llms.custom_httpx.http_handler import HTTPHandler
client = HTTPHandler()
messages = [
{"role": "system", "content": "You are a good assistant"},
{"role": "user", "content": "What is the weather in Copenhagen?"},
]
with patch.object(client, "post") as mock_post:
try:
completion(
model="bedrock/" + model,
messages=messages,
client=client,
)
except Exception as e:
pass
mock_post.assert_called_once()
print(mock_post.call_args.kwargs)
json_data = json.loads(mock_post.call_args.kwargs["data"])
assert (
json_data["prompt"].rstrip()
== """<begin▁of▁sentence>You are a good assistant<User>What is the weather in Copenhagen?<Assistant><think>"""
)
def test_bedrock_deepseek_known_tokenizer_config():
model = "deepseek_r1/arn:aws:bedrock:us-east-1:1234:imported-model/45d34re"
from litellm.llms.custom_httpx.http_handler import HTTPHandler
from unittest.mock import Mock
import httpx
mock_response = Mock(spec=httpx.Response)
mock_response.status_code = 200
mock_response.headers = {
"x-amzn-bedrock-input-token-count": "20",
"x-amzn-bedrock-output-token-count": "30",
}
# The response format for deepseek_r1
response_data = {
"generation": "The weather in Copenhagen is currently sunny with a temperature of 20°C (68°F). The forecast shows clear skies throughout the day with a gentle breeze from the northwest.",
"stop_reason": "stop",
"stop_sequence": None,
}
mock_response.json.return_value = response_data
mock_response.text = json.dumps(response_data)
client = HTTPHandler()
messages = [
{"role": "system", "content": "You are a good assistant"},
{"role": "user", "content": "What is the weather in Copenhagen?"},
]
with patch.object(client, "post", return_value=mock_response) as mock_post:
completion(
model="bedrock/" + model,
messages=messages,
client=client,
)
mock_post.assert_called_once()
print(mock_post.call_args.kwargs)
json_data = json.loads(mock_post.call_args.kwargs["data"])
assert (
json_data["prompt"].rstrip()
== """<begin▁of▁sentence>You are a good assistant<User>What is the weather in Copenhagen?<Assistant><think>"""
)
# test_replicate_custom_prompt_dict()
# commenthing this out since we won't be always testing a custom, replicate deployment