Litellm dev 02 13 2025 p2 (#8525)

* fix(azure/chat/gpt_transformation.py): add 'prediction' as a support azure param

Closes https://github.com/BerriAI/litellm/issues/8500

* build(model_prices_and_context_window.json): add new 'gemini-2.0-pro-exp-02-05' model

* style: cleanup invalid json trailing commma

* feat(utils.py): support passing 'tokenizer_config' to register_prompt_template

enables passing complete tokenizer config of model to litellm

 Allows calling deepseek on bedrock with the correct prompt template

* fix(utils.py): fix register_prompt_template for custom model names

* test(test_prompt_factory.py): fix test

* test(test_completion.py): add e2e test for bedrock invoke deepseek ft model

* feat(base_invoke_transformation.py): support hf_model_name param for bedrock invoke calls

enables proxy admin to set base model for ft bedrock deepseek model

* feat(bedrock/invoke): support deepseek_r1 route for bedrock

makes it easy to apply the right chat template to that call

* feat(constants.py): store deepseek r1 chat template - allow user to get correct response from deepseek r1 without extra work

* test(test_completion.py): add e2e mock test for bedrock deepseek

* docs(bedrock.md): document new deepseek_r1 route for bedrock

allows us to use the right config

* fix(exception_mapping_utils.py): catch read operation timeout
This commit is contained in:
Krish Dholakia 2025-02-13 20:28:42 -08:00 committed by GitHub
parent be31a5f7b4
commit e33543ae4f
15 changed files with 444 additions and 39 deletions

View file

@ -223,6 +223,7 @@ def exception_type( # type: ignore # noqa: PLR0915
"Request Timeout Error" in error_str
or "Request timed out" in error_str
or "Timed out generating response" in error_str
or "The read operation timed out" in error_str
):
exception_mapping_worked = True

View file

@ -325,26 +325,6 @@ def phind_codellama_pt(messages):
return prompt
known_tokenizer_config = {
"mistralai/Mistral-7B-Instruct-v0.1": {
"tokenizer": {
"chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
"bos_token": "<s>",
"eos_token": "</s>",
},
"status": "success",
},
"meta-llama/Meta-Llama-3-8B-Instruct": {
"tokenizer": {
"chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
"bos_token": "<|begin_of_text|>",
"eos_token": "",
},
"status": "success",
},
}
def hf_chat_template( # noqa: PLR0915
model: str, messages: list, chat_template: Optional[Any] = None
):
@ -378,11 +358,11 @@ def hf_chat_template( # noqa: PLR0915
else:
return {"status": "failure"}
if model in known_tokenizer_config:
tokenizer_config = known_tokenizer_config[model]
if model in litellm.known_tokenizer_config:
tokenizer_config = litellm.known_tokenizer_config[model]
else:
tokenizer_config = _get_tokenizer_config(model)
known_tokenizer_config.update({model: tokenizer_config})
litellm.known_tokenizer_config.update({model: tokenizer_config})
if (
tokenizer_config["status"] == "failure"
@ -475,6 +455,12 @@ def hf_chat_template( # noqa: PLR0915
) # don't use verbose_logger.exception, if exception is raised
def deepseek_r1_pt(messages):
return hf_chat_template(
model="deepseek-r1/deepseek-r1-7b-instruct", messages=messages
)
# Anthropic template
def claude_2_1_pt(
messages: list,