mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
[BUGFIX] Fix #9199 - incorrect api token parameter in create_pretrained_tokenizer
Fix `Tokenizer.from_pretrained` call and use correct parameter name for api token. Do not call `Tokenizer.from_pretrained` with api token parameter if it is empty or `None`.
This commit is contained in:
parent
33ead69c0a
commit
4324b0c142
2 changed files with 21 additions and 8 deletions
|
@ -1705,15 +1705,18 @@ def create_pretrained_tokenizer(
|
|||
dict: A dictionary with the tokenizer and its type.
|
||||
"""
|
||||
|
||||
try:
|
||||
tokenizer = Tokenizer.from_pretrained(
|
||||
identifier, revision=revision, auth_token=auth_token # type: ignore
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
f"Error creating pretrained tokenizer: {e}. Defaulting to version without 'auth_token'."
|
||||
)
|
||||
if not auth_token:
|
||||
tokenizer = Tokenizer.from_pretrained(identifier, revision=revision)
|
||||
else:
|
||||
try:
|
||||
tokenizer = Tokenizer.from_pretrained(
|
||||
identifier, revision=revision, token=auth_token # type: ignore
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
f"Error creating pretrained tokenizer: {e}. Defaulting to version without 'token'."
|
||||
)
|
||||
tokenizer = Tokenizer.from_pretrained(identifier, revision=revision)
|
||||
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
|
||||
|
||||
|
||||
|
|
|
@ -151,6 +151,16 @@ def test_tokenizers():
|
|||
llama3_tokens_1 == llama3_tokens_2
|
||||
), "Custom tokenizer is not being used! It has been configured to use the same tokenizer as the built in llama3 tokenizer and the results should be the same."
|
||||
|
||||
if hf_api_key := os.getenv("HUGGINGFACE_API_KEY"):
|
||||
private_tokenizer = create_pretrained_tokenizer(
|
||||
"meta-llama/Llama-3.1-70B", auth_token=hf_api_key
|
||||
)
|
||||
private_tokens = token_counter(
|
||||
custom_tokenizer=private_tokenizer, text=sample_text
|
||||
)
|
||||
print(f"private model tokens: {private_tokens}")
|
||||
assert private_tokens > 0
|
||||
|
||||
print("test tokenizer: It worked!")
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occured: {e}")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue