forked from phoenix/litellm-mirror
fix(utils.py): allow passing in custom pricing to completion_cost as params
This commit is contained in:
parent
a0c5c402ae
commit
a2a5884df1
3 changed files with 94 additions and 4 deletions
|
@ -65,6 +65,42 @@ async def test_custom_pricing(sync_mode):
|
||||||
assert new_handler.response_cost == 0
|
assert new_handler.response_cost == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_custom_pricing_as_completion_cost_param():
|
||||||
|
from litellm import ModelResponse, Choices, Message
|
||||||
|
from litellm.utils import Usage
|
||||||
|
|
||||||
|
resp = ModelResponse(
|
||||||
|
id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason=None,
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
||||||
|
role="assistant",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
created=1700775391,
|
||||||
|
model="ft:gpt-3.5-turbo:my-org:custom_suffix:id",
|
||||||
|
object="chat.completion",
|
||||||
|
system_fingerprint=None,
|
||||||
|
usage=Usage(prompt_tokens=21, completion_tokens=17, total_tokens=38),
|
||||||
|
)
|
||||||
|
|
||||||
|
cost = litellm.completion_cost(
|
||||||
|
completion_response=resp,
|
||||||
|
custom_cost_per_token={
|
||||||
|
"input_cost_per_token": 1000,
|
||||||
|
"output_cost_per_token": 20,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_cost = 1000 * 21 + 17 * 20
|
||||||
|
|
||||||
|
assert round(cost, 5) == round(expected_cost, 5)
|
||||||
|
|
||||||
|
|
||||||
def test_get_gpt3_tokens():
|
def test_get_gpt3_tokens():
|
||||||
max_tokens = get_max_tokens("gpt-3.5-turbo")
|
max_tokens = get_max_tokens("gpt-3.5-turbo")
|
||||||
print(max_tokens)
|
print(max_tokens)
|
||||||
|
|
6
litellm/types/utils.py
Normal file
6
litellm/types/utils.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict
|
||||||
|
|
||||||
|
|
||||||
|
class CostPerToken(TypedDict):
|
||||||
|
input_cost_per_token: float
|
||||||
|
output_cost_per_token: float
|
|
@ -35,6 +35,7 @@ from dataclasses import (
|
||||||
import litellm._service_logger # for storing API inputs, outputs, and metadata
|
import litellm._service_logger # for storing API inputs, outputs, and metadata
|
||||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
|
from litellm.types.utils import CostPerToken
|
||||||
|
|
||||||
oidc_cache = DualCache()
|
oidc_cache = DualCache()
|
||||||
|
|
||||||
|
@ -4222,6 +4223,29 @@ def token_counter(
|
||||||
return num_tokens
|
return num_tokens
|
||||||
|
|
||||||
|
|
||||||
|
def _cost_per_token_custom_pricing_helper(
|
||||||
|
prompt_tokens=0,
|
||||||
|
completion_tokens=0,
|
||||||
|
response_time_ms=None,
|
||||||
|
### CUSTOM PRICING ###
|
||||||
|
custom_cost_per_token: Optional[CostPerToken] = None,
|
||||||
|
custom_cost_per_second: Optional[float] = None,
|
||||||
|
) -> Optional[Tuple[float, float]]:
|
||||||
|
"""Internal helper function for calculating cost, if custom pricing given"""
|
||||||
|
if custom_cost_per_token is None and custom_cost_per_second is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if custom_cost_per_token is not None:
|
||||||
|
input_cost = custom_cost_per_token["input_cost_per_token"] * prompt_tokens
|
||||||
|
output_cost = custom_cost_per_token["output_cost_per_token"] * completion_tokens
|
||||||
|
return input_cost, output_cost
|
||||||
|
elif custom_cost_per_second is not None:
|
||||||
|
output_cost = custom_cost_per_second * response_time_ms / 1000 # type: ignore
|
||||||
|
return 0, output_cost
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def cost_per_token(
|
def cost_per_token(
|
||||||
model="",
|
model="",
|
||||||
prompt_tokens=0,
|
prompt_tokens=0,
|
||||||
|
@ -4229,7 +4253,10 @@ def cost_per_token(
|
||||||
response_time_ms=None,
|
response_time_ms=None,
|
||||||
custom_llm_provider=None,
|
custom_llm_provider=None,
|
||||||
region_name=None,
|
region_name=None,
|
||||||
):
|
### CUSTOM PRICING ###
|
||||||
|
custom_cost_per_token: Optional[CostPerToken] = None,
|
||||||
|
custom_cost_per_second: Optional[float] = None,
|
||||||
|
) -> Tuple[float, float]:
|
||||||
"""
|
"""
|
||||||
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||||
|
|
||||||
|
@ -4237,13 +4264,28 @@ def cost_per_token(
|
||||||
model (str): The name of the model to use. Default is ""
|
model (str): The name of the model to use. Default is ""
|
||||||
prompt_tokens (int): The number of tokens in the prompt.
|
prompt_tokens (int): The number of tokens in the prompt.
|
||||||
completion_tokens (int): The number of tokens in the completion.
|
completion_tokens (int): The number of tokens in the completion.
|
||||||
|
response_time (float): The amount of time, in milliseconds, it took the call to complete.
|
||||||
|
custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
|
||||||
|
custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
|
||||||
|
custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
|
tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
|
||||||
"""
|
"""
|
||||||
|
## CUSTOM PRICING ##
|
||||||
|
response_cost = _cost_per_token_custom_pricing_helper(
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
response_time_ms=response_time_ms,
|
||||||
|
custom_cost_per_second=custom_cost_per_second,
|
||||||
|
custom_cost_per_token=custom_cost_per_token,
|
||||||
|
)
|
||||||
|
if response_cost is not None:
|
||||||
|
return response_cost[0], response_cost[1]
|
||||||
|
|
||||||
# given
|
# given
|
||||||
prompt_tokens_cost_usd_dollar = 0
|
prompt_tokens_cost_usd_dollar: float = 0
|
||||||
completion_tokens_cost_usd_dollar = 0
|
completion_tokens_cost_usd_dollar: float = 0
|
||||||
model_cost_ref = litellm.model_cost
|
model_cost_ref = litellm.model_cost
|
||||||
model_with_provider = model
|
model_with_provider = model
|
||||||
if custom_llm_provider is not None:
|
if custom_llm_provider is not None:
|
||||||
|
@ -4406,6 +4448,9 @@ def completion_cost(
|
||||||
size=None,
|
size=None,
|
||||||
quality=None,
|
quality=None,
|
||||||
n=None, # number of images
|
n=None, # number of images
|
||||||
|
### CUSTOM PRICING ###
|
||||||
|
custom_cost_per_token: Optional[CostPerToken] = None,
|
||||||
|
custom_cost_per_second: Optional[float] = None,
|
||||||
) -> float:
|
) -> float:
|
||||||
"""
|
"""
|
||||||
Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.
|
Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.
|
||||||
|
@ -4418,6 +4463,8 @@ def completion_cost(
|
||||||
prompt (str): Optional. The input prompt passed to the llm
|
prompt (str): Optional. The input prompt passed to the llm
|
||||||
completion (str): Optional. The output completion text from the llm
|
completion (str): Optional. The output completion text from the llm
|
||||||
total_time (float): Optional. (Only used for Replicate LLMs) The total time used for the request in seconds
|
total_time (float): Optional. (Only used for Replicate LLMs) The total time used for the request in seconds
|
||||||
|
custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
|
||||||
|
custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
float: The cost in USD dollars for the completion based on the provided parameters.
|
float: The cost in USD dollars for the completion based on the provided parameters.
|
||||||
|
@ -4427,7 +4474,6 @@ def completion_cost(
|
||||||
|
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
- For custom pricing, see this - https://docs.litellm.ai/docs/proxy/custom_pricing
|
|
||||||
- If completion_response is provided, the function extracts token information and the model name from it.
|
- If completion_response is provided, the function extracts token information and the model name from it.
|
||||||
- If completion_response is not provided, the function calculates token counts based on the model and input text.
|
- If completion_response is not provided, the function calculates token counts based on the model and input text.
|
||||||
- The cost is calculated based on the model, prompt tokens, and completion tokens.
|
- The cost is calculated based on the model, prompt tokens, and completion tokens.
|
||||||
|
@ -4562,6 +4608,8 @@ def completion_cost(
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
response_time_ms=total_time,
|
response_time_ms=total_time,
|
||||||
region_name=region_name,
|
region_name=region_name,
|
||||||
|
custom_cost_per_second=custom_cost_per_second,
|
||||||
|
custom_cost_per_token=custom_cost_per_token,
|
||||||
)
|
)
|
||||||
_final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
_final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||||
print_verbose(
|
print_verbose(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue