build: migrate all constants into constants.py

2025-04-26 11:14:04 +00:00 · 2025-03-24 18:11:21 -07:00 · 2025-03-24 18:11:21 -07:00 · c11e0de69d
commit c11e0de69d
parent 7882bdc787
3 changed files with 11 additions and 5 deletions
--- a/litellm/constants.py
+++ b/litellm/constants.py
@ -18,9 +18,13 @@ MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024  # 1MB = 1024KB
 SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000  # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
 #### RELIABILITY ####
 REPEATED_STREAMING_CHUNK_LIMIT = 100  # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
 DEFAULT_MAX_LRU_CACHE_SIZE = 16
 #### Networking settings ####
 request_timeout: float = 6000  # time in seconds
 STREAM_SSE_DONE_STRING: str = "[DONE]"
 ### SPEND TRACKING ###
 DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND = 0.001400  # price per second for a100 80GB
 LITELLM_CHAT_PROVIDERS = [
    "openai",
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -9,6 +9,10 @@ from pydantic import BaseModel
 import litellm
 import litellm._logging
 from litellm import verbose_logger
 from litellm.constants import (
    DEFAULT_MAX_LRU_CACHE_SIZE,
    DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND,
 )
 from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
    StandardBuiltInToolCostTracking,
 )
@ -357,9 +361,7 @@ def cost_per_token(  # noqa: PLR0915
 def get_replicate_completion_pricing(completion_response: dict, total_time=0.0):
    # see https://replicate.com/pricing
    # for all litellm currently supported LLMs, almost all requests go to a100_80gb
-    a100_80gb_price_per_second_public = (
+    a100_80gb_price_per_second_public = DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND  # assume all calls sent to A100 80GB for now
        0.001400  # assume all calls sent to A100 80GB for now
    )
    if total_time == 0.0:  # total time is in ms
        start_time = completion_response.get("created", time.time())
        end_time = getattr(completion_response, "ended", time.time())
@ -452,7 +454,7 @@ def _select_model_name_for_cost_calc(
    return return_model
-@lru_cache(maxsize=16)
+@lru_cache(maxsize=DEFAULT_MAX_LRU_CACHE_SIZE)
 def _model_contains_known_llm_provider(model: str) -> bool:
    """
    Check if the model contains a known llm provider
--- a/tests/code_coverage_tests/ban_constant_numbers.py
+++ b/tests/code_coverage_tests/ban_constant_numbers.py
@ -3,7 +3,7 @@ import ast
 import os
 # Extremely restrictive set of allowed numbers
-ALLOWED_NUMBERS = {0, 1, -1, 2, 10, 100}
+ALLOWED_NUMBERS = {0, 1, -1, 2, 10, 100, 1000}
 class HardcodedNumberFinder(ast.NodeVisitor):