mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Squashed commit of the following: (#9709)
commitb12a9892b7
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Wed Apr 2 08:09:56 2025 -0700 fix(utils.py): don't modify openai_token_counter commit294de31803
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 21:22:40 2025 -0700 fix: fix linting error commitcb6e9fbe40
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 19:52:45 2025 -0700 refactor: complete migration commitbfc159172d
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 19:09:59 2025 -0700 refactor: refactor more constants commit43ffb6a558
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:45:24 2025 -0700 fix: test commit04dbe4310c
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:28:58 2025 -0700 refactor: refactor: move more constants into constants.py commit3c26284aff
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:14:46 2025 -0700 refactor: migrate hardcoded constants out of __init__.py commitc11e0de69d
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:11:21 2025 -0700 build: migrate all constants into constants.py commit7882bdc787
Author: Krrish Dholakia <krrishdholakia@gmail.com> Date: Mon Mar 24 18:07:37 2025 -0700 build: initial test banning hardcoded numbers in repo
This commit is contained in:
parent
5a722ef18f
commit
8ee32291e0
51 changed files with 509 additions and 118 deletions
|
@ -9,6 +9,10 @@ from pydantic import BaseModel
|
|||
import litellm
|
||||
import litellm._logging
|
||||
from litellm import verbose_logger
|
||||
from litellm.constants import (
|
||||
DEFAULT_MAX_LRU_CACHE_SIZE,
|
||||
DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND,
|
||||
)
|
||||
from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
|
||||
StandardBuiltInToolCostTracking,
|
||||
)
|
||||
|
@ -355,9 +359,7 @@ def cost_per_token( # noqa: PLR0915
|
|||
def get_replicate_completion_pricing(completion_response: dict, total_time=0.0):
|
||||
# see https://replicate.com/pricing
|
||||
# for all litellm currently supported LLMs, almost all requests go to a100_80gb
|
||||
a100_80gb_price_per_second_public = (
|
||||
0.001400 # assume all calls sent to A100 80GB for now
|
||||
)
|
||||
a100_80gb_price_per_second_public = DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND # assume all calls sent to A100 80GB for now
|
||||
if total_time == 0.0: # total time is in ms
|
||||
start_time = completion_response.get("created", time.time())
|
||||
end_time = getattr(completion_response, "ended", time.time())
|
||||
|
@ -450,7 +452,7 @@ def _select_model_name_for_cost_calc(
|
|||
return return_model
|
||||
|
||||
|
||||
@lru_cache(maxsize=16)
|
||||
@lru_cache(maxsize=DEFAULT_MAX_LRU_CACHE_SIZE)
|
||||
def _model_contains_known_llm_provider(model: str) -> bool:
|
||||
"""
|
||||
Check if the model contains a known llm provider
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue