mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
build: migrate all constants into constants.py
This commit is contained in:
parent
7882bdc787
commit
c11e0de69d
3 changed files with 11 additions and 5 deletions
|
@ -18,9 +18,13 @@ MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB
|
||||||
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
|
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
|
||||||
#### RELIABILITY ####
|
#### RELIABILITY ####
|
||||||
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
|
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
|
||||||
|
DEFAULT_MAX_LRU_CACHE_SIZE = 16
|
||||||
#### Networking settings ####
|
#### Networking settings ####
|
||||||
request_timeout: float = 6000 # time in seconds
|
request_timeout: float = 6000 # time in seconds
|
||||||
STREAM_SSE_DONE_STRING: str = "[DONE]"
|
STREAM_SSE_DONE_STRING: str = "[DONE]"
|
||||||
|
### SPEND TRACKING ###
|
||||||
|
DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND = 0.001400 # price per second for a100 80GB
|
||||||
|
|
||||||
|
|
||||||
LITELLM_CHAT_PROVIDERS = [
|
LITELLM_CHAT_PROVIDERS = [
|
||||||
"openai",
|
"openai",
|
||||||
|
|
|
@ -9,6 +9,10 @@ from pydantic import BaseModel
|
||||||
import litellm
|
import litellm
|
||||||
import litellm._logging
|
import litellm._logging
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
|
from litellm.constants import (
|
||||||
|
DEFAULT_MAX_LRU_CACHE_SIZE,
|
||||||
|
DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND,
|
||||||
|
)
|
||||||
from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
|
from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
|
||||||
StandardBuiltInToolCostTracking,
|
StandardBuiltInToolCostTracking,
|
||||||
)
|
)
|
||||||
|
@ -357,9 +361,7 @@ def cost_per_token( # noqa: PLR0915
|
||||||
def get_replicate_completion_pricing(completion_response: dict, total_time=0.0):
|
def get_replicate_completion_pricing(completion_response: dict, total_time=0.0):
|
||||||
# see https://replicate.com/pricing
|
# see https://replicate.com/pricing
|
||||||
# for all litellm currently supported LLMs, almost all requests go to a100_80gb
|
# for all litellm currently supported LLMs, almost all requests go to a100_80gb
|
||||||
a100_80gb_price_per_second_public = (
|
a100_80gb_price_per_second_public = DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND # assume all calls sent to A100 80GB for now
|
||||||
0.001400 # assume all calls sent to A100 80GB for now
|
|
||||||
)
|
|
||||||
if total_time == 0.0: # total time is in ms
|
if total_time == 0.0: # total time is in ms
|
||||||
start_time = completion_response.get("created", time.time())
|
start_time = completion_response.get("created", time.time())
|
||||||
end_time = getattr(completion_response, "ended", time.time())
|
end_time = getattr(completion_response, "ended", time.time())
|
||||||
|
@ -452,7 +454,7 @@ def _select_model_name_for_cost_calc(
|
||||||
return return_model
|
return return_model
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=16)
|
@lru_cache(maxsize=DEFAULT_MAX_LRU_CACHE_SIZE)
|
||||||
def _model_contains_known_llm_provider(model: str) -> bool:
|
def _model_contains_known_llm_provider(model: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if the model contains a known llm provider
|
Check if the model contains a known llm provider
|
||||||
|
|
|
@ -3,7 +3,7 @@ import ast
|
||||||
import os
|
import os
|
||||||
|
|
||||||
# Extremely restrictive set of allowed numbers
|
# Extremely restrictive set of allowed numbers
|
||||||
ALLOWED_NUMBERS = {0, 1, -1, 2, 10, 100}
|
ALLOWED_NUMBERS = {0, 1, -1, 2, 10, 100, 1000}
|
||||||
|
|
||||||
|
|
||||||
class HardcodedNumberFinder(ast.NodeVisitor):
|
class HardcodedNumberFinder(ast.NodeVisitor):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue