mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Doc updates + management endpoint fixes (#8138)
* Litellm dev 01 29 2025 p4 (#8107) * fix(key_management_endpoints.py): always get db team Fixes https://github.com/BerriAI/litellm/issues/7983 * test(test_key_management.py): add unit test enforcing check_db_only is always true on key generate checks * test: fix test * test: skip gemini thinking * Litellm dev 01 29 2025 p3 (#8106) * fix(__init__.py): reduces size of __init__.py and reduces scope for errors by using correct param * refactor(__init__.py): refactor init by cleaning up redundant params * refactor(__init__.py): move more constants into constants.py cleanup root * refactor(__init__.py): more cleanup * feat(__init__.py): expose new 'disable_hf_tokenizer_download' param enables hf model usage in offline env * docs(config_settings.md): document new disable_hf_tokenizer_download param * fix: fix linting error * fix: fix unsafe comparison * test: fix test * docs(public_teams.md): add doc showing how to expose public teams for users to join * docs: add beta disclaimer on public teams * test: update tests
This commit is contained in:
parent
69a6da4727
commit
de261e2120
16 changed files with 428 additions and 349 deletions
|
@ -150,6 +150,7 @@ from litellm.types.utils import (
|
|||
ModelResponseStream,
|
||||
ProviderField,
|
||||
ProviderSpecificModelInfo,
|
||||
SelectTokenizerResponse,
|
||||
StreamingChoices,
|
||||
TextChoices,
|
||||
TextCompletionResponse,
|
||||
|
@ -1440,34 +1441,47 @@ def _select_tokenizer(
|
|||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def _select_tokenizer_helper(model: str):
|
||||
def _select_tokenizer_helper(model: str) -> SelectTokenizerResponse:
|
||||
|
||||
if litellm.disable_hf_tokenizer_download is True:
|
||||
return _return_openai_tokenizer(model)
|
||||
|
||||
try:
|
||||
if model in litellm.cohere_models and "command-r" in model:
|
||||
# cohere
|
||||
cohere_tokenizer = Tokenizer.from_pretrained(
|
||||
"Xenova/c4ai-command-r-v01-tokenizer"
|
||||
)
|
||||
return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer}
|
||||
# anthropic
|
||||
elif model in litellm.anthropic_models and "claude-3" not in model:
|
||||
claude_tokenizer = Tokenizer.from_str(claude_json_str)
|
||||
return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer}
|
||||
# llama2
|
||||
elif "llama-2" in model.lower() or "replicate" in model.lower():
|
||||
tokenizer = Tokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
|
||||
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
|
||||
# llama3
|
||||
elif "llama-3" in model.lower():
|
||||
tokenizer = Tokenizer.from_pretrained("Xenova/llama-3-tokenizer")
|
||||
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
|
||||
result = _return_huggingface_tokenizer(model)
|
||||
if result is not None:
|
||||
return result
|
||||
except Exception as e:
|
||||
verbose_logger.debug(f"Error selecting tokenizer: {e}")
|
||||
|
||||
# default - tiktoken
|
||||
return {
|
||||
"type": "openai_tokenizer",
|
||||
"tokenizer": encoding,
|
||||
} # default to openai tokenizer
|
||||
return _return_openai_tokenizer(model)
|
||||
|
||||
|
||||
def _return_openai_tokenizer(model: str) -> SelectTokenizerResponse:
|
||||
return {"type": "openai_tokenizer", "tokenizer": encoding}
|
||||
|
||||
|
||||
def _return_huggingface_tokenizer(model: str) -> Optional[SelectTokenizerResponse]:
|
||||
if model in litellm.cohere_models and "command-r" in model:
|
||||
# cohere
|
||||
cohere_tokenizer = Tokenizer.from_pretrained(
|
||||
"Xenova/c4ai-command-r-v01-tokenizer"
|
||||
)
|
||||
return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer}
|
||||
# anthropic
|
||||
elif model in litellm.anthropic_models and "claude-3" not in model:
|
||||
claude_tokenizer = Tokenizer.from_str(claude_json_str)
|
||||
return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer}
|
||||
# llama2
|
||||
elif "llama-2" in model.lower() or "replicate" in model.lower():
|
||||
tokenizer = Tokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
|
||||
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
|
||||
# llama3
|
||||
elif "llama-3" in model.lower():
|
||||
tokenizer = Tokenizer.from_pretrained("Xenova/llama-3-tokenizer")
|
||||
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def encode(model="", text="", custom_tokenizer: Optional[dict] = None):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue