Add cost tracking for rerank via bedrock (#8691)

* feat(bedrock/rerank): infer model region if model given as arn * test: add unit testing to ensure bedrock region name inferred from arn on rerank * feat(bedrock/rerank/transformation.py): include search units for bedrock rerank result Resolves https://github.com/BerriAI/litellm/issues/7258#issuecomment-2671557137 * test(test_bedrock_completion.py): add testing for bedrock cohere rerank * feat(cost_calculator.py): refactor rerank cost tracking to support bedrock cost tracking * build(model_prices_and_context_window.json): add amazon.rerank model to model cost map * fix(cost_calculator.py): bedrock/common_utils.py get base model from model w/ arn -> handles rerank model * build(model_prices_and_context_window.json): add bedrock cohere rerank pricing * feat(bedrock/rerank): migrate bedrock config to basererank config * Revert "feat(bedrock/rerank): migrate bedrock config to basererank config" This reverts commit 84fae1f167. * test: add testing to ensure large doc / queries are correctly counted * Revert "test: add testing to ensure large doc / queries are correctly counted" This reverts commit 4337f1657e. * fix(migrate-jina-ai-to-rerank-config): enables cost tracking * refactor(jina_ai/): finish migrating jina ai to base rerank config enables cost tracking * fix(jina_ai/rerank): e2e jina ai rerank cost tracking * fix: cleanup dead code * fix: fix python3.8 compatibility error * test: fix test * test: add e2e testing for azure ai rerank * fix: fix linting error * test: mark cohere as flaky
2025-04-27 11:43:54 +00:00 · 2025-02-20 21:00:18 -08:00 · 2025-02-20 21:00:18 -08:00 · 30a4f2abc2
commit 30a4f2abc2
parent e5f7bde268
26 changed files with 524 additions and 296 deletions
--- a/litellm/llms/base_llm/rerank/transformation.py
+++ b/litellm/llms/base_llm/rerank/transformation.py
@ -1,9 +1,10 @@
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union

 import httpx

-from litellm.types.rerank import OptionalRerankParams, RerankResponse
+from litellm.types.rerank import OptionalRerankParams, RerankBilledUnits, RerankResponse
+from litellm.types.utils import ModelInfo

 from ..chat.transformation import BaseLLMException

@ -66,7 +67,7 @@ class BaseRerankConfig(ABC):
    @abstractmethod
    def map_cohere_rerank_params(
        self,
-        non_default_params: Optional[dict],
+        non_default_params: dict,
        model: str,
        drop_params: bool,
        query: str,
@ -79,8 +80,48 @@ class BaseRerankConfig(ABC):
    ) -> OptionalRerankParams:
        pass

-    @abstractmethod
    def get_error_class(
        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
    ) -> BaseLLMException:
-        pass
+        raise BaseLLMException(
+            status_code=status_code,
+            message=error_message,
+            headers=headers,
+        )
+
+    def calculate_rerank_cost(
+        self,
+        model: str,
+        custom_llm_provider: Optional[str] = None,
+        billed_units: Optional[RerankBilledUnits] = None,
+        model_info: Optional[ModelInfo] = None,
+    ) -> Tuple[float, float]:
+        """
+        Calculates the cost per query for a given rerank model.
+
+        Input:
+            - model: str, the model name without provider prefix
+            - custom_llm_provider: str, the provider used for the model. If provided, used to check if the litellm model info is for that provider.
+            - num_queries: int, the number of queries to calculate the cost for
+            - model_info: ModelInfo, the model info for the given model
+
+        Returns:
+            Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
+        """
+
+        if (
+            model_info is None
+            or "input_cost_per_query" not in model_info
+            or model_info["input_cost_per_query"] is None
+            or billed_units is None
+        ):
+            return 0.0, 0.0
+
+        search_units = billed_units.get("search_units")
+
+        if search_units is None:
+            return 0.0, 0.0
+
+        prompt_cost = model_info["input_cost_per_query"] * search_units
+
+        return prompt_cost, 0.0