Doc updates + management endpoint fixes (#8138)

* Litellm dev 01 29 2025 p4 (#8107)

* fix(key_management_endpoints.py): always get db team

Fixes https://github.com/BerriAI/litellm/issues/7983

* test(test_key_management.py): add unit test enforcing check_db_only is always true on key generate checks

* test: fix test

* test: skip gemini thinking

* Litellm dev 01 29 2025 p3 (#8106)

* fix(__init__.py): reduces size of __init__.py and reduces scope for errors by using correct param

* refactor(__init__.py): refactor init by cleaning up redundant params

* refactor(__init__.py): move more constants into constants.py

cleanup root

* refactor(__init__.py): more cleanup

* feat(__init__.py): expose new 'disable_hf_tokenizer_download' param

enables hf model usage in offline env

* docs(config_settings.md): document new disable_hf_tokenizer_download param

* fix: fix linting error

* fix: fix unsafe comparison

* test: fix test

* docs(public_teams.md): add doc showing how to expose public teams for users to join

* docs: add beta disclaimer on public teams

* test: update tests
This commit is contained in:
Krish Dholakia 2025-01-30 22:56:41 -08:00 committed by GitHub
parent 69a6da4727
commit de261e2120
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 428 additions and 349 deletions

View file

@ -150,6 +150,7 @@ from litellm.types.utils import (
ModelResponseStream,
ProviderField,
ProviderSpecificModelInfo,
SelectTokenizerResponse,
StreamingChoices,
TextChoices,
TextCompletionResponse,
@ -1440,34 +1441,47 @@ def _select_tokenizer(
@lru_cache(maxsize=128)
def _select_tokenizer_helper(model: str):
def _select_tokenizer_helper(model: str) -> SelectTokenizerResponse:
if litellm.disable_hf_tokenizer_download is True:
return _return_openai_tokenizer(model)
try:
if model in litellm.cohere_models and "command-r" in model:
# cohere
cohere_tokenizer = Tokenizer.from_pretrained(
"Xenova/c4ai-command-r-v01-tokenizer"
)
return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer}
# anthropic
elif model in litellm.anthropic_models and "claude-3" not in model:
claude_tokenizer = Tokenizer.from_str(claude_json_str)
return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer}
# llama2
elif "llama-2" in model.lower() or "replicate" in model.lower():
tokenizer = Tokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
# llama3
elif "llama-3" in model.lower():
tokenizer = Tokenizer.from_pretrained("Xenova/llama-3-tokenizer")
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
result = _return_huggingface_tokenizer(model)
if result is not None:
return result
except Exception as e:
verbose_logger.debug(f"Error selecting tokenizer: {e}")
# default - tiktoken
return {
"type": "openai_tokenizer",
"tokenizer": encoding,
} # default to openai tokenizer
return _return_openai_tokenizer(model)
def _return_openai_tokenizer(model: str) -> SelectTokenizerResponse:
return {"type": "openai_tokenizer", "tokenizer": encoding}
def _return_huggingface_tokenizer(model: str) -> Optional[SelectTokenizerResponse]:
if model in litellm.cohere_models and "command-r" in model:
# cohere
cohere_tokenizer = Tokenizer.from_pretrained(
"Xenova/c4ai-command-r-v01-tokenizer"
)
return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer}
# anthropic
elif model in litellm.anthropic_models and "claude-3" not in model:
claude_tokenizer = Tokenizer.from_str(claude_json_str)
return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer}
# llama2
elif "llama-2" in model.lower() or "replicate" in model.lower():
tokenizer = Tokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
# llama3
elif "llama-3" in model.lower():
tokenizer = Tokenizer.from_pretrained("Xenova/llama-3-tokenizer")
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
else:
return None
def encode(model="", text="", custom_tokenizer: Optional[dict] = None):