mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
Litellm dev 02 13 2025 p2 (#8525)
* fix(azure/chat/gpt_transformation.py): add 'prediction' as a support azure param Closes https://github.com/BerriAI/litellm/issues/8500 * build(model_prices_and_context_window.json): add new 'gemini-2.0-pro-exp-02-05' model * style: cleanup invalid json trailing commma * feat(utils.py): support passing 'tokenizer_config' to register_prompt_template enables passing complete tokenizer config of model to litellm Allows calling deepseek on bedrock with the correct prompt template * fix(utils.py): fix register_prompt_template for custom model names * test(test_prompt_factory.py): fix test * test(test_completion.py): add e2e test for bedrock invoke deepseek ft model * feat(base_invoke_transformation.py): support hf_model_name param for bedrock invoke calls enables proxy admin to set base model for ft bedrock deepseek model * feat(bedrock/invoke): support deepseek_r1 route for bedrock makes it easy to apply the right chat template to that call * feat(constants.py): store deepseek r1 chat template - allow user to get correct response from deepseek r1 without extra work * test(test_completion.py): add e2e mock test for bedrock deepseek * docs(bedrock.md): document new deepseek_r1 route for bedrock allows us to use the right config * fix(exception_mapping_utils.py): catch read operation timeout
This commit is contained in:
parent
be31a5f7b4
commit
e33543ae4f
15 changed files with 444 additions and 39 deletions
|
@ -223,6 +223,7 @@ def exception_type( # type: ignore # noqa: PLR0915
|
|||
"Request Timeout Error" in error_str
|
||||
or "Request timed out" in error_str
|
||||
or "Timed out generating response" in error_str
|
||||
or "The read operation timed out" in error_str
|
||||
):
|
||||
exception_mapping_worked = True
|
||||
|
||||
|
|
|
@ -325,26 +325,6 @@ def phind_codellama_pt(messages):
|
|||
return prompt
|
||||
|
||||
|
||||
known_tokenizer_config = {
|
||||
"mistralai/Mistral-7B-Instruct-v0.1": {
|
||||
"tokenizer": {
|
||||
"chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
|
||||
"bos_token": "<s>",
|
||||
"eos_token": "</s>",
|
||||
},
|
||||
"status": "success",
|
||||
},
|
||||
"meta-llama/Meta-Llama-3-8B-Instruct": {
|
||||
"tokenizer": {
|
||||
"chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
|
||||
"bos_token": "<|begin_of_text|>",
|
||||
"eos_token": "",
|
||||
},
|
||||
"status": "success",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def hf_chat_template( # noqa: PLR0915
|
||||
model: str, messages: list, chat_template: Optional[Any] = None
|
||||
):
|
||||
|
@ -378,11 +358,11 @@ def hf_chat_template( # noqa: PLR0915
|
|||
else:
|
||||
return {"status": "failure"}
|
||||
|
||||
if model in known_tokenizer_config:
|
||||
tokenizer_config = known_tokenizer_config[model]
|
||||
if model in litellm.known_tokenizer_config:
|
||||
tokenizer_config = litellm.known_tokenizer_config[model]
|
||||
else:
|
||||
tokenizer_config = _get_tokenizer_config(model)
|
||||
known_tokenizer_config.update({model: tokenizer_config})
|
||||
litellm.known_tokenizer_config.update({model: tokenizer_config})
|
||||
|
||||
if (
|
||||
tokenizer_config["status"] == "failure"
|
||||
|
@ -475,6 +455,12 @@ def hf_chat_template( # noqa: PLR0915
|
|||
) # don't use verbose_logger.exception, if exception is raised
|
||||
|
||||
|
||||
def deepseek_r1_pt(messages):
|
||||
return hf_chat_template(
|
||||
model="deepseek-r1/deepseek-r1-7b-instruct", messages=messages
|
||||
)
|
||||
|
||||
|
||||
# Anthropic template
|
||||
def claude_2_1_pt(
|
||||
messages: list,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue