mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
Merge pull request #5573 from BerriAI/litellm_add_rerank_spend_tracking
[Feat] Add cost tracking for cohere rerank
This commit is contained in:
commit
64a828b455
6 changed files with 98 additions and 1 deletions
|
@ -22,6 +22,7 @@ from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_cha
|
||||||
from litellm.llms.anthropic.cost_calculation import (
|
from litellm.llms.anthropic.cost_calculation import (
|
||||||
cost_per_token as anthropic_cost_per_token,
|
cost_per_token as anthropic_cost_per_token,
|
||||||
)
|
)
|
||||||
|
from litellm.rerank_api.types import RerankResponse
|
||||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||||
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
||||||
from litellm.types.utils import PassthroughCallTypes, Usage
|
from litellm.types.utils import PassthroughCallTypes, Usage
|
||||||
|
@ -93,6 +94,8 @@ def cost_per_token(
|
||||||
"transcription",
|
"transcription",
|
||||||
"aspeech",
|
"aspeech",
|
||||||
"speech",
|
"speech",
|
||||||
|
"rerank",
|
||||||
|
"arerank",
|
||||||
] = "completion",
|
] = "completion",
|
||||||
) -> Tuple[float, float]:
|
) -> Tuple[float, float]:
|
||||||
"""
|
"""
|
||||||
|
@ -487,6 +490,8 @@ def completion_cost(
|
||||||
"transcription",
|
"transcription",
|
||||||
"aspeech",
|
"aspeech",
|
||||||
"speech",
|
"speech",
|
||||||
|
"rerank",
|
||||||
|
"arerank",
|
||||||
] = "completion",
|
] = "completion",
|
||||||
### REGION ###
|
### REGION ###
|
||||||
custom_llm_provider=None,
|
custom_llm_provider=None,
|
||||||
|
@ -747,6 +752,7 @@ def response_cost_calculator(
|
||||||
TranscriptionResponse,
|
TranscriptionResponse,
|
||||||
TextCompletionResponse,
|
TextCompletionResponse,
|
||||||
HttpxBinaryResponseContent,
|
HttpxBinaryResponseContent,
|
||||||
|
RerankResponse,
|
||||||
],
|
],
|
||||||
model: str,
|
model: str,
|
||||||
custom_llm_provider: Optional[str],
|
custom_llm_provider: Optional[str],
|
||||||
|
@ -765,6 +771,8 @@ def response_cost_calculator(
|
||||||
"transcription",
|
"transcription",
|
||||||
"aspeech",
|
"aspeech",
|
||||||
"speech",
|
"speech",
|
||||||
|
"rerank",
|
||||||
|
"arerank",
|
||||||
],
|
],
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
cache_hit: Optional[bool] = None,
|
cache_hit: Optional[bool] = None,
|
||||||
|
@ -789,6 +797,15 @@ def response_cost_calculator(
|
||||||
call_type=call_type,
|
call_type=call_type,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
)
|
)
|
||||||
|
elif isinstance(response_object, RerankResponse) and (
|
||||||
|
call_type == "arerank" or call_type == "rerank"
|
||||||
|
):
|
||||||
|
response_cost = rerank_cost(
|
||||||
|
rerank_response=response_object,
|
||||||
|
model=model,
|
||||||
|
call_type=call_type,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
if custom_pricing is True: # override defaults if custom pricing is set
|
if custom_pricing is True: # override defaults if custom pricing is set
|
||||||
base_model = model
|
base_model = model
|
||||||
|
@ -820,3 +837,28 @@ def response_cost_calculator(
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def rerank_cost(
|
||||||
|
rerank_response: RerankResponse,
|
||||||
|
model: str,
|
||||||
|
call_type: Literal["rerank", "arerank"],
|
||||||
|
custom_llm_provider: Optional[str],
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Returns
|
||||||
|
- float or None: cost of response OR none if error.
|
||||||
|
"""
|
||||||
|
_, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if custom_llm_provider == "cohere":
|
||||||
|
return 0.002
|
||||||
|
raise ValueError(
|
||||||
|
f"invalid custom_llm_provider for rerank model: {model}, custom_llm_provider: {custom_llm_provider}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.exception(
|
||||||
|
f"litellm.cost_calculator.py::rerank_cost - Exception occurred - {str(e)}"
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
|
@ -30,6 +30,7 @@ from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.litellm_core_utils.redact_messages import (
|
from litellm.litellm_core_utils.redact_messages import (
|
||||||
redact_message_input_output_from_logging,
|
redact_message_input_output_from_logging,
|
||||||
)
|
)
|
||||||
|
from litellm.rerank_api.types import RerankResponse
|
||||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||||
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
||||||
from litellm.types.utils import (
|
from litellm.types.utils import (
|
||||||
|
@ -525,6 +526,7 @@ class Logging:
|
||||||
TranscriptionResponse,
|
TranscriptionResponse,
|
||||||
TextCompletionResponse,
|
TextCompletionResponse,
|
||||||
HttpxBinaryResponseContent,
|
HttpxBinaryResponseContent,
|
||||||
|
RerankResponse,
|
||||||
],
|
],
|
||||||
cache_hit: Optional[bool] = None,
|
cache_hit: Optional[bool] = None,
|
||||||
):
|
):
|
||||||
|
@ -588,6 +590,7 @@ class Logging:
|
||||||
or isinstance(result, TranscriptionResponse)
|
or isinstance(result, TranscriptionResponse)
|
||||||
or isinstance(result, TextCompletionResponse)
|
or isinstance(result, TextCompletionResponse)
|
||||||
or isinstance(result, HttpxBinaryResponseContent) # tts
|
or isinstance(result, HttpxBinaryResponseContent) # tts
|
||||||
|
or isinstance(result, RerankResponse)
|
||||||
):
|
):
|
||||||
## RESPONSE COST ##
|
## RESPONSE COST ##
|
||||||
self.model_call_details["response_cost"] = (
|
self.model_call_details["response_cost"] = (
|
||||||
|
|
|
@ -9,7 +9,7 @@ from litellm.llms.cohere.rerank import CohereRerank
|
||||||
from litellm.llms.togetherai.rerank import TogetherAIRerank
|
from litellm.llms.togetherai.rerank import TogetherAIRerank
|
||||||
from litellm.secret_managers.main import get_secret
|
from litellm.secret_managers.main import get_secret
|
||||||
from litellm.types.router import *
|
from litellm.types.router import *
|
||||||
from litellm.utils import supports_httpx_timeout
|
from litellm.utils import client, supports_httpx_timeout
|
||||||
|
|
||||||
from .types import RerankRequest, RerankResponse
|
from .types import RerankRequest, RerankResponse
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@ together_rerank = TogetherAIRerank()
|
||||||
#################################################
|
#################################################
|
||||||
|
|
||||||
|
|
||||||
|
@client
|
||||||
async def arerank(
|
async def arerank(
|
||||||
model: str,
|
model: str,
|
||||||
query: str,
|
query: str,
|
||||||
|
@ -64,6 +65,7 @@ async def arerank(
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
@client
|
||||||
def rerank(
|
def rerank(
|
||||||
model: str,
|
model: str,
|
||||||
query: str,
|
query: str,
|
||||||
|
|
|
@ -23,3 +23,16 @@ class RerankResponse(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
results: List[dict] # Contains index and relevance_score
|
results: List[dict] # Contains index and relevance_score
|
||||||
meta: dict # Contains api_version and billed_units
|
meta: dict # Contains api_version and billed_units
|
||||||
|
_hidden_params: dict = {}
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
underscore_attrs_are_private = True
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return self.__dict__[key]
|
||||||
|
|
||||||
|
def get(self, key, default=None):
|
||||||
|
return self.__dict__.get(key, default)
|
||||||
|
|
||||||
|
def __contains__(self, key):
|
||||||
|
return key in self.__dict__
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
@ -20,6 +21,7 @@ import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import RateLimitError, Timeout, completion, completion_cost, embedding
|
from litellm import RateLimitError, Timeout, completion, completion_cost, embedding
|
||||||
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
|
|
||||||
|
|
||||||
|
@ -177,3 +179,37 @@ async def test_rerank_custom_api_base():
|
||||||
assert response.results is not None
|
assert response.results is not None
|
||||||
|
|
||||||
assert_response_shape(response, custom_llm_provider="cohere")
|
assert_response_shape(response, custom_llm_provider="cohere")
|
||||||
|
|
||||||
|
|
||||||
|
class TestLogger(CustomLogger):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.kwargs = None
|
||||||
|
self.response_obj = None
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
print("in success event for rerank, kwargs = ", kwargs)
|
||||||
|
print("in success event for rerank, response_obj = ", response_obj)
|
||||||
|
self.kwargs = kwargs
|
||||||
|
self.response_obj = response_obj
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_rerank_custom_callbacks():
|
||||||
|
custom_logger = TestLogger()
|
||||||
|
litellm.callbacks = [custom_logger]
|
||||||
|
response = await litellm.arerank(
|
||||||
|
model="cohere/rerank-english-v3.0",
|
||||||
|
query="hello",
|
||||||
|
documents=["hello", "world"],
|
||||||
|
top_n=3,
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(5)
|
||||||
|
|
||||||
|
print("async re rank response: ", response)
|
||||||
|
assert custom_logger.kwargs is not None
|
||||||
|
assert custom_logger.kwargs.get("response_cost") > 0.0
|
||||||
|
assert custom_logger.response_obj is not None
|
||||||
|
assert custom_logger.response_obj.results is not None
|
||||||
|
|
|
@ -745,6 +745,7 @@ def client(original_function):
|
||||||
or kwargs.get("amoderation", False) == True
|
or kwargs.get("amoderation", False) == True
|
||||||
or kwargs.get("atext_completion", False) == True
|
or kwargs.get("atext_completion", False) == True
|
||||||
or kwargs.get("atranscription", False) == True
|
or kwargs.get("atranscription", False) == True
|
||||||
|
or kwargs.get("arerank", False) == True
|
||||||
):
|
):
|
||||||
# [OPTIONAL] CHECK MAX RETRIES / REQUEST
|
# [OPTIONAL] CHECK MAX RETRIES / REQUEST
|
||||||
if litellm.num_retries_per_request is not None:
|
if litellm.num_retries_per_request is not None:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue