mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
Squashed commit of the following: (#9709)
commitb12a9892b7
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Wed Apr 2 08:09:56 2025 -0700 fix(utils.py): don't modify openai_token_counter commit294de31803
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 21:22:40 2025 -0700 fix: fix linting error commitcb6e9fbe40
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 19:52:45 2025 -0700 refactor: complete migration commitbfc159172d
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 19:09:59 2025 -0700 refactor: refactor more constants commit43ffb6a558
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:45:24 2025 -0700 fix: test commit04dbe4310c
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:28:58 2025 -0700 refactor: refactor: move more constants into constants.py commit3c26284aff
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:14:46 2025 -0700 refactor: migrate hardcoded constants out of __init__.py commitc11e0de69d
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:11:21 2025 -0700 build: migrate all constants into constants.py commit7882bdc787
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:07:37 2025 -0700 build: initial test banning hardcoded numbers in repo
This commit is contained in:
parent
5a722ef18f
commit
8ee32291e0
51 changed files with 509 additions and 118 deletions
|
@ -9,6 +9,7 @@ DEFAULT_FAILURE_THRESHOLD_PERCENT = (
|
|||
0.5 # default cooldown a deployment if 50% of requests fail in a given minute
|
||||
)
|
||||
DEFAULT_MAX_TOKENS = 4096
|
||||
DEFAULT_ALLOWED_FAILS = 3
|
||||
DEFAULT_REDIS_SYNC_INTERVAL = 1
|
||||
DEFAULT_COOLDOWN_TIME_SECONDS = 5
|
||||
DEFAULT_REPLICATE_POLLING_RETRIES = 5
|
||||
|
@ -16,16 +17,71 @@ DEFAULT_REPLICATE_POLLING_DELAY_SECONDS = 1
|
|||
DEFAULT_IMAGE_TOKEN_COUNT = 250
|
||||
DEFAULT_IMAGE_WIDTH = 300
|
||||
DEFAULT_IMAGE_HEIGHT = 300
|
||||
DEFAULT_MAX_TOKENS = 256 # used when providers need a default
|
||||
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB
|
||||
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
|
||||
REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer"
|
||||
REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_spend_update_buffer"
|
||||
MAX_REDIS_BUFFER_DEQUEUE_COUNT = 100
|
||||
MINIMUM_PROMPT_CACHE_TOKEN_COUNT = (
|
||||
1024 # minimum number of tokens to cache a prompt by Anthropic
|
||||
)
|
||||
DEFAULT_TRIM_RATIO = 0.75 # default ratio of tokens to trim from the end of a prompt
|
||||
HOURS_IN_A_DAY = 24
|
||||
DAYS_IN_A_WEEK = 7
|
||||
DAYS_IN_A_MONTH = 28
|
||||
DAYS_IN_A_YEAR = 365
|
||||
REPLICATE_MODEL_NAME_WITH_ID_LENGTH = 64
|
||||
#### TOKEN COUNTING ####
|
||||
FUNCTION_DEFINITION_TOKEN_COUNT = 9
|
||||
SYSTEM_MESSAGE_TOKEN_COUNT = 4
|
||||
TOOL_CHOICE_OBJECT_TOKEN_COUNT = 4
|
||||
DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT = 10
|
||||
DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT = 20
|
||||
MAX_SHORT_SIDE_FOR_IMAGE_HIGH_RES = 768
|
||||
MAX_LONG_SIDE_FOR_IMAGE_HIGH_RES = 2000
|
||||
MAX_TILE_WIDTH = 512
|
||||
MAX_TILE_HEIGHT = 512
|
||||
OPENAI_FILE_SEARCH_COST_PER_1K_CALLS = 2.5 / 1000
|
||||
MIN_NON_ZERO_TEMPERATURE = 0.0001
|
||||
#### RELIABILITY ####
|
||||
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
|
||||
DEFAULT_MAX_LRU_CACHE_SIZE = 16
|
||||
INITIAL_RETRY_DELAY = 0.5
|
||||
MAX_RETRY_DELAY = 8.0
|
||||
JITTER = 0.75
|
||||
DEFAULT_IN_MEMORY_TTL = 5 # default time to live for the in-memory cache
|
||||
DEFAULT_POLLING_INTERVAL = 0.03 # default polling interval for the scheduler
|
||||
AZURE_OPERATION_POLLING_TIMEOUT = 120
|
||||
REDIS_SOCKET_TIMEOUT = 0.1
|
||||
REDIS_CONNECTION_POOL_TIMEOUT = 5
|
||||
NON_LLM_CONNECTION_TIMEOUT = 15 # timeout for adjacent services (e.g. jwt auth)
|
||||
MAX_EXCEPTION_MESSAGE_LENGTH = 2000
|
||||
BEDROCK_MAX_POLICY_SIZE = 75
|
||||
REPLICATE_POLLING_DELAY_SECONDS = 0.5
|
||||
DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS = 4096
|
||||
TOGETHER_AI_4_B = 4
|
||||
TOGETHER_AI_8_B = 8
|
||||
TOGETHER_AI_21_B = 21
|
||||
TOGETHER_AI_41_B = 41
|
||||
TOGETHER_AI_80_B = 80
|
||||
TOGETHER_AI_110_B = 110
|
||||
TOGETHER_AI_EMBEDDING_150_M = 150
|
||||
TOGETHER_AI_EMBEDDING_350_M = 350
|
||||
QDRANT_SCALAR_QUANTILE = 0.99
|
||||
QDRANT_VECTOR_SIZE = 1536
|
||||
CACHED_STREAMING_CHUNK_DELAY = 0.02
|
||||
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 512
|
||||
DEFAULT_MAX_TOKENS_FOR_TRITON = 2000
|
||||
#### Networking settings ####
|
||||
request_timeout: float = 6000 # time in seconds
|
||||
STREAM_SSE_DONE_STRING: str = "[DONE]"
|
||||
### SPEND TRACKING ###
|
||||
DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND = 0.001400 # price per second for a100 80GB
|
||||
FIREWORKS_AI_56_B_MOE = 56
|
||||
FIREWORKS_AI_176_B_MOE = 176
|
||||
FIREWORKS_AI_16_B = 16
|
||||
FIREWORKS_AI_80_B = 80
|
||||
|
||||
LITELLM_CHAT_PROVIDERS = [
|
||||
"openai",
|
||||
|
@ -426,6 +482,9 @@ MCP_TOOL_NAME_PREFIX = "mcp_tool"
|
|||
MAX_SPENDLOG_ROWS_TO_QUERY = (
|
||||
1_000_000 # if spendLogs has more than 1M rows, do not query the DB
|
||||
)
|
||||
DEFAULT_SOFT_BUDGET = (
|
||||
50.0 # by default all litellm proxy keys have a soft budget of 50.0
|
||||
)
|
||||
# makes it clear this is a rate limit error for a litellm virtual key
|
||||
RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY = "LiteLLM Virtual Key user_api_key_hash"
|
||||
|
||||
|
@ -451,3 +510,14 @@ LITELLM_PROXY_ADMIN_NAME = "default_user_id"
|
|||
########################### DB CRON JOB NAMES ###########################
|
||||
DB_SPEND_UPDATE_JOB_NAME = "db_spend_update_job"
|
||||
DEFAULT_CRON_JOB_LOCK_TTL_SECONDS = 60 # 1 minute
|
||||
PROXY_BUDGET_RESCHEDULER_MIN_TIME = 597
|
||||
PROXY_BUDGET_RESCHEDULER_MAX_TIME = 605
|
||||
PROXY_BATCH_WRITE_AT = 10 # in seconds
|
||||
DEFAULT_HEALTH_CHECK_INTERVAL = 300 # 5 minutes
|
||||
PROMETHEUS_FALLBACK_STATS_SEND_TIME_HOURS = 9
|
||||
DEFAULT_MODEL_CREATED_AT_TIME = 1677610602 # returns on `/models` endpoint
|
||||
DEFAULT_SLACK_ALERTING_THRESHOLD = 300
|
||||
MAX_TEAM_LIST_LIMIT = 20
|
||||
DEFAULT_PROMPT_INJECTION_SIMILARITY_THRESHOLD = 0.7
|
||||
LENGTH_OF_LITELLM_GENERATED_KEY = 16
|
||||
SECRET_MANAGER_REFRESH_INTERVAL = 86400
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue