[BUGFIX] Fix #9199 - incorrect api token parameter in create_pretrained_tokenizer

Fix `Tokenizer.from_pretrained` call and use correct parameter name for api token.
Do not call `Tokenizer.from_pretrained` with api token parameter if it is empty or `None`.
This commit is contained in:
Vladislav Vinogradov 2025-03-31 09:58:33 +03:00 committed by Vladislav Vinogradov
parent 33ead69c0a
commit 4324b0c142
2 changed files with 21 additions and 8 deletions

View file

@ -1705,15 +1705,18 @@ def create_pretrained_tokenizer(
dict: A dictionary with the tokenizer and its type.
"""
try:
tokenizer = Tokenizer.from_pretrained(
identifier, revision=revision, auth_token=auth_token # type: ignore
)
except Exception as e:
verbose_logger.error(
f"Error creating pretrained tokenizer: {e}. Defaulting to version without 'auth_token'."
)
if not auth_token:
tokenizer = Tokenizer.from_pretrained(identifier, revision=revision)
else:
try:
tokenizer = Tokenizer.from_pretrained(
identifier, revision=revision, token=auth_token # type: ignore
)
except Exception as e:
verbose_logger.error(
f"Error creating pretrained tokenizer: {e}. Defaulting to version without 'token'."
)
tokenizer = Tokenizer.from_pretrained(identifier, revision=revision)
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}