refactor: refactor: move more constants into constants.py

2025-04-25 10:44:24 +00:00 · 2025-03-24 18:28:58 -07:00 · 2025-03-24 18:28:58 -07:00 · 04dbe4310c
commit 04dbe4310c
parent 3c26284aff
7 changed files with 99 additions and 1115 deletions
--- a/litellm/constants.py
+++ b/litellm/constants.py
@ -18,9 +18,22 @@ DEFAULT_IMAGE_HEIGHT = 300
 DEFAULT_MAX_TOKENS = 256  # used when providers need a default
 MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024  # 1MB = 1024KB
 SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000  # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
 MINIMUM_PROMPT_CACHE_TOKEN_COUNT = (
    1024  # minimum number of tokens to cache a prompt by Anthropic
 )
 DEFAULT_TRIM_RATIO = 0.75  # default ratio of tokens to trim from the end of a prompt
 #### TOKEN COUNTING ####
 FUNCTION_DEFINITION_TOKEN_COUNT = 9
 SYSTEM_MESSAGE_TOKEN_COUNT = 4
 TOOL_CHOICE_OBJECT_TOKEN_COUNT = 4
 #### RELIABILITY ####
 REPEATED_STREAMING_CHUNK_LIMIT = 100  # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
 DEFAULT_MAX_LRU_CACHE_SIZE = 16
 INITIAL_RETRY_DELAY = 0.5
 MAX_RETRY_DELAY = 8.0
 JITTER = 0.75
 DEFAULT_IN_MEMORY_TTL = 5  # default time to live for the in-memory cache
 DEFAULT_POLLING_INTERVAL = 0.03  # default polling interval for the scheduler
 #### Networking settings ####
 request_timeout: float = 6000  # time in seconds
 STREAM_SSE_DONE_STRING: str = "[DONE]"
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -281,7 +281,7 @@ from litellm.router import (
    LiteLLM_Params,
    ModelGroupInfo,
 )
-from litellm.scheduler import DefaultPriorities, FlowItem, Scheduler
+from litellm.scheduler import FlowItem, Scheduler
 from litellm.secret_managers.aws_secret_manager import load_aws_kms
 from litellm.secret_managers.google_kms import load_google_kms
 from litellm.secret_managers.main import (
@ -301,6 +301,7 @@ from litellm.types.llms.openai import HttpxBinaryResponseContent
 from litellm.types.router import DeploymentTypedDict
 from litellm.types.router import ModelInfo as RouterModelInfo
 from litellm.types.router import RouterGeneralSettings, updateDeployment
 from litellm.types.scheduler import DefaultPriorities
 from litellm.types.utils import CredentialItem, CustomHuggingfaceTokenizer
 from litellm.types.utils import ModelInfo as ModelMapInfo
 from litellm.types.utils import RawRequestTypedDict, StandardLoggingPayload
--- a/litellm/scheduler.py
+++ b/litellm/scheduler.py
@ -6,17 +6,14 @@ from pydantic import BaseModel
 from litellm import print_verbose
 from litellm.caching.caching import DualCache, RedisCache
 from litellm.constants import DEFAULT_IN_MEMORY_TTL, DEFAULT_POLLING_INTERVAL
 class SchedulerCacheKeys(enum.Enum):
    queue = "scheduler:queue"
-    default_in_memory_ttl = 5  # cache queue in-memory for 5s when redis cache available
+    default_in_memory_ttl = (
-
+        DEFAULT_IN_MEMORY_TTL  # cache queue in-memory for 5s when redis cache available
-
+    )
 class DefaultPriorities(enum.Enum):
    High = 0
    Medium = 128
    Low = 255
 class FlowItem(BaseModel):
@ -44,7 +41,9 @@ class Scheduler:
        self.cache = DualCache(
            redis_cache=redis_cache, default_in_memory_ttl=default_in_memory_ttl
        )
-        self.polling_interval = polling_interval or 0.03  # default to 3ms
+        self.polling_interval = (
            polling_interval or DEFAULT_POLLING_INTERVAL
        )  # default to 3ms
    async def add_request(self, request: FlowItem):
        # We use the priority directly, as lower values indicate higher priority
--- a/litellm/types/scheduler.py
+++ b/litellm/types/scheduler.py
@ -0,0 +1,7 @@
 from enum import Enum
 class DefaultPriorities(Enum):
    High = 0
    Medium = 128
    Low = 255
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -60,6 +60,16 @@ import litellm.litellm_core_utils.json_validation_rule
 from litellm.caching._internal_lru_cache import lru_cache_wrapper
 from litellm.caching.caching import DualCache
 from litellm.caching.caching_handler import CachingHandlerResponse, LLMCachingHandler
 from litellm.constants import (
    DEFAULT_MAX_LRU_CACHE_SIZE,
    DEFAULT_TRIM_RATIO,
    FUNCTION_DEFINITION_TOKEN_COUNT,
    INITIAL_RETRY_DELAY,
    JITTER,
    MAX_RETRY_DELAY,
    MINIMUM_PROMPT_CACHE_TOKEN_COUNT,
    TOOL_CHOICE_OBJECT_TOKEN_COUNT,
 )
 from litellm.integrations.custom_guardrail import CustomGuardrail
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.core_helpers import (
@ -1519,7 +1529,7 @@ def _select_tokenizer(
    return _select_tokenizer_helper(model=model)
-@lru_cache(maxsize=128)
+@lru_cache(maxsize=DEFAULT_MAX_LRU_CACHE_SIZE)
 def _select_tokenizer_helper(model: str) -> SelectTokenizerResponse:
    if litellm.disable_hf_tokenizer_download is True:
@ -1664,7 +1674,7 @@ def openai_token_counter(  # noqa: PLR0915
    if tools:
        num_tokens += len(encoding.encode(_format_function_definitions(tools)))
-        num_tokens += 9  # Additional tokens for function definition of tools
+        num_tokens += FUNCTION_DEFINITION_TOKEN_COUNT  # Additional tokens for function definition of tools
    # If there's a system message and tools are present, subtract four tokens
    if tools and includes_system_message:
        num_tokens -= 4
@ -1674,7 +1684,7 @@ def openai_token_counter(  # noqa: PLR0915
    if tool_choice == "none":
        num_tokens += 1
    elif isinstance(tool_choice, dict):
-        num_tokens += 7
+        num_tokens += TOOL_CHOICE_OBJECT_TOKEN_COUNT
        num_tokens += len(encoding.encode(tool_choice["function"]["name"]))
    return num_tokens
@ -5311,15 +5321,15 @@ def _calculate_retry_after(
    if retry_after is not None and 0 < retry_after <= 60:
        return retry_after
-    initial_retry_delay = 0.5
+    initial_retry_delay = INITIAL_RETRY_DELAY
-    max_retry_delay = 8.0
+    max_retry_delay = MAX_RETRY_DELAY
    nb_retries = max_retries - remaining_retries
    # Apply exponential backoff, but not more than the max.
    sleep_seconds = min(initial_retry_delay * pow(2.0, nb_retries), max_retry_delay)
    # Apply some jitter, plus-or-minus half a second.
-    jitter = 1 - 0.25 * random.random()
+    jitter = JITTER * random.random()
    timeout = sleep_seconds * jitter
    return timeout if timeout >= min_timeout else min_timeout
@ -5645,7 +5655,7 @@ def shorten_message_to_fit_limit(message, tokens_needed, model: Optional[str]):
 def trim_messages(
    messages,
    model: Optional[str] = None,
-    trim_ratio: float = 0.75,
+    trim_ratio: float = DEFAULT_TRIM_RATIO,
    return_response_tokens: bool = False,
    max_tokens=None,
 ):
@ -6477,7 +6487,7 @@ def is_prompt_caching_valid_prompt(
            model=model,
            use_default_image_token_count=True,
        )
-        return token_count >= 1024
+        return token_count >= MINIMUM_PROMPT_CACHE_TOKEN_COUNT
    except Exception as e:
        verbose_logger.error(f"Error in is_prompt_caching_valid_prompt: {e}")
        return False
--- a/tests/code_coverage_tests/ban_constant_numbers.py
+++ b/tests/code_coverage_tests/ban_constant_numbers.py
@ -3,7 +3,32 @@ import ast
 import os
 # Extremely restrictive set of allowed numbers
-ALLOWED_NUMBERS = {0, 1, -1, 2, 10, 100, 1000}
+ALLOWED_NUMBERS = {
    0,
    1,
    -1,
    2,
    10,
    100,
    1000,
    1,
    4,
    3,
    500,
    408,
    422,
    401,
    404,
    429,
    6,
    409,
    60,
    403,
    400,
    3600,
    0.75,
    503,
 }
 class HardcodedNumberFinder(ast.NodeVisitor):
@ -47,10 +72,13 @@ def main():
    exit_code = 0
    folder = "../../litellm"
    ignore_file = "constants.py"
    ignore_folder = "types"
    for root, dirs, files in os.walk(folder):
        for filename in files:
            if filename.endswith(".py") and filename != ignore_file:
                full_path = os.path.join(root, filename)
                if ignore_folder in full_path:
                    continue
                exit_code |= check_file(full_path)
    sys.exit(exit_code)
--- a/tests/code_coverage_tests/log.txt
+++ b/tests/code_coverage_tests/log.txt