mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
Squashed commit of the following: (#9709)
commitb12a9892b7
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Wed Apr 2 08:09:56 2025 -0700 fix(utils.py): don't modify openai_token_counter commit294de31803
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 21:22:40 2025 -0700 fix: fix linting error commitcb6e9fbe40
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 19:52:45 2025 -0700 refactor: complete migration commitbfc159172d
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 19:09:59 2025 -0700 refactor: refactor more constants commit43ffb6a558
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:45:24 2025 -0700 fix: test commit04dbe4310c
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:28:58 2025 -0700 refactor: refactor: move more constants into constants.py commit3c26284aff
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:14:46 2025 -0700 refactor: migrate hardcoded constants out of __init__.py commitc11e0de69d
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:11:21 2025 -0700 build: migrate all constants into constants.py commit7882bdc787
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:07:37 2025 -0700 build: initial test banning hardcoded numbers in repo
This commit is contained in:
parent
3911187129
commit
354a75fb59
51 changed files with 509 additions and 118 deletions
|
@ -62,6 +62,16 @@ import litellm.llms.gemini
|
|||
from litellm.caching._internal_lru_cache import lru_cache_wrapper
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.caching.caching_handler import CachingHandlerResponse, LLMCachingHandler
|
||||
from litellm.constants import (
|
||||
DEFAULT_MAX_LRU_CACHE_SIZE,
|
||||
DEFAULT_TRIM_RATIO,
|
||||
FUNCTION_DEFINITION_TOKEN_COUNT,
|
||||
INITIAL_RETRY_DELAY,
|
||||
JITTER,
|
||||
MAX_RETRY_DELAY,
|
||||
MINIMUM_PROMPT_CACHE_TOKEN_COUNT,
|
||||
TOOL_CHOICE_OBJECT_TOKEN_COUNT,
|
||||
)
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.core_helpers import (
|
||||
|
@ -1520,7 +1530,7 @@ def _select_tokenizer(
|
|||
return _select_tokenizer_helper(model=model)
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
@lru_cache(maxsize=DEFAULT_MAX_LRU_CACHE_SIZE)
|
||||
def _select_tokenizer_helper(model: str) -> SelectTokenizerResponse:
|
||||
if litellm.disable_hf_tokenizer_download is True:
|
||||
return _return_openai_tokenizer(model)
|
||||
|
@ -5336,15 +5346,15 @@ def _calculate_retry_after(
|
|||
if retry_after is not None and 0 < retry_after <= 60:
|
||||
return retry_after
|
||||
|
||||
initial_retry_delay = 0.5
|
||||
max_retry_delay = 8.0
|
||||
initial_retry_delay = INITIAL_RETRY_DELAY
|
||||
max_retry_delay = MAX_RETRY_DELAY
|
||||
nb_retries = max_retries - remaining_retries
|
||||
|
||||
# Apply exponential backoff, but not more than the max.
|
||||
sleep_seconds = min(initial_retry_delay * pow(2.0, nb_retries), max_retry_delay)
|
||||
|
||||
# Apply some jitter, plus-or-minus half a second.
|
||||
jitter = 1 - 0.25 * random.random()
|
||||
jitter = JITTER * random.random()
|
||||
timeout = sleep_seconds * jitter
|
||||
return timeout if timeout >= min_timeout else min_timeout
|
||||
|
||||
|
@ -5670,7 +5680,7 @@ def shorten_message_to_fit_limit(message, tokens_needed, model: Optional[str]):
|
|||
def trim_messages(
|
||||
messages,
|
||||
model: Optional[str] = None,
|
||||
trim_ratio: float = 0.75,
|
||||
trim_ratio: float = DEFAULT_TRIM_RATIO,
|
||||
return_response_tokens: bool = False,
|
||||
max_tokens=None,
|
||||
):
|
||||
|
@ -6543,7 +6553,7 @@ def is_prompt_caching_valid_prompt(
|
|||
model=model,
|
||||
use_default_image_token_count=True,
|
||||
)
|
||||
return token_count >= 1024
|
||||
return token_count >= MINIMUM_PROMPT_CACHE_TOKEN_COUNT
|
||||
except Exception as e:
|
||||
verbose_logger.error(f"Error in is_prompt_caching_valid_prompt: {e}")
|
||||
return False
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue