diff --git a/litellm/constants.py b/litellm/constants.py
index da66f897c9..d6b5d1e08a 100644
--- a/litellm/constants.py
+++ b/litellm/constants.py
@@ -18,9 +18,13 @@ MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024  # 1MB = 1024KB
 SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000  # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
 #### RELIABILITY ####
 REPEATED_STREAMING_CHUNK_LIMIT = 100  # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
+DEFAULT_MAX_LRU_CACHE_SIZE = 16
 #### Networking settings ####
 request_timeout: float = 6000  # time in seconds
 STREAM_SSE_DONE_STRING: str = "[DONE]"
+### SPEND TRACKING ###
+DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND = 0.001400  # price per second for a100 80GB
+
 
 LITELLM_CHAT_PROVIDERS = [
     "openai",
diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index 55736772af..8956e0f40e 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -9,6 +9,10 @@ from pydantic import BaseModel
 import litellm
 import litellm._logging
 from litellm import verbose_logger
+from litellm.constants import (
+    DEFAULT_MAX_LRU_CACHE_SIZE,
+    DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND,
+)
 from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
     StandardBuiltInToolCostTracking,
 )
@@ -357,9 +361,7 @@ def cost_per_token(  # noqa: PLR0915
 def get_replicate_completion_pricing(completion_response: dict, total_time=0.0):
     # see https://replicate.com/pricing
     # for all litellm currently supported LLMs, almost all requests go to a100_80gb
-    a100_80gb_price_per_second_public = (
-        0.001400  # assume all calls sent to A100 80GB for now
-    )
+    a100_80gb_price_per_second_public = DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND  # assume all calls sent to A100 80GB for now
     if total_time == 0.0:  # total time is in ms
         start_time = completion_response.get("created", time.time())
         end_time = getattr(completion_response, "ended", time.time())
@@ -452,7 +454,7 @@ def _select_model_name_for_cost_calc(
     return return_model
 
 
-@lru_cache(maxsize=16)
+@lru_cache(maxsize=DEFAULT_MAX_LRU_CACHE_SIZE)
 def _model_contains_known_llm_provider(model: str) -> bool:
     """
     Check if the model contains a known llm provider
diff --git a/tests/code_coverage_tests/ban_constant_numbers.py b/tests/code_coverage_tests/ban_constant_numbers.py
index 40e7139ae9..ea31a8b051 100644
--- a/tests/code_coverage_tests/ban_constant_numbers.py
+++ b/tests/code_coverage_tests/ban_constant_numbers.py
@@ -3,7 +3,7 @@ import ast
 import os
 
 # Extremely restrictive set of allowed numbers
-ALLOWED_NUMBERS = {0, 1, -1, 2, 10, 100}
+ALLOWED_NUMBERS = {0, 1, -1, 2, 10, 100, 1000}
 
 
 class HardcodedNumberFinder(ast.NodeVisitor):