forked from phoenix/litellm-mirror
Merge pull request #4295 from BerriAI/litellm_gemini_pricing_2
Vertex AI - character based cost calculation
This commit is contained in:
commit
71716bec48
5 changed files with 287 additions and 17 deletions
|
@ -6,6 +6,9 @@ from typing import List, Literal, Optional, Tuple, Union
|
||||||
import litellm
|
import litellm
|
||||||
import litellm._logging
|
import litellm._logging
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
|
from litellm.litellm_core_utils.llm_cost_calc.google import (
|
||||||
|
cost_per_character as google_cost_per_character,
|
||||||
|
)
|
||||||
from litellm.litellm_core_utils.llm_cost_calc.google import (
|
from litellm.litellm_core_utils.llm_cost_calc.google import (
|
||||||
cost_per_token as google_cost_per_token,
|
cost_per_token as google_cost_per_token,
|
||||||
)
|
)
|
||||||
|
@ -23,8 +26,8 @@ from litellm.utils import (
|
||||||
|
|
||||||
|
|
||||||
def _cost_per_token_custom_pricing_helper(
|
def _cost_per_token_custom_pricing_helper(
|
||||||
prompt_tokens=0,
|
prompt_tokens: float = 0,
|
||||||
completion_tokens=0,
|
completion_tokens: float = 0,
|
||||||
response_time_ms=None,
|
response_time_ms=None,
|
||||||
### CUSTOM PRICING ###
|
### CUSTOM PRICING ###
|
||||||
custom_cost_per_token: Optional[CostPerToken] = None,
|
custom_cost_per_token: Optional[CostPerToken] = None,
|
||||||
|
@ -52,6 +55,9 @@ def cost_per_token(
|
||||||
response_time_ms=None,
|
response_time_ms=None,
|
||||||
custom_llm_provider: Optional[str] = None,
|
custom_llm_provider: Optional[str] = None,
|
||||||
region_name=None,
|
region_name=None,
|
||||||
|
### CHARACTER PRICING ###
|
||||||
|
prompt_characters: float = 0,
|
||||||
|
completion_characters: float = 0,
|
||||||
### CUSTOM PRICING ###
|
### CUSTOM PRICING ###
|
||||||
custom_cost_per_token: Optional[CostPerToken] = None,
|
custom_cost_per_token: Optional[CostPerToken] = None,
|
||||||
custom_cost_per_second: Optional[float] = None,
|
custom_cost_per_second: Optional[float] = None,
|
||||||
|
@ -64,6 +70,8 @@ def cost_per_token(
|
||||||
prompt_tokens (int): The number of tokens in the prompt.
|
prompt_tokens (int): The number of tokens in the prompt.
|
||||||
completion_tokens (int): The number of tokens in the completion.
|
completion_tokens (int): The number of tokens in the completion.
|
||||||
response_time (float): The amount of time, in milliseconds, it took the call to complete.
|
response_time (float): The amount of time, in milliseconds, it took the call to complete.
|
||||||
|
prompt_characters (float): The number of characters in the prompt. Used for vertex ai cost calculation.
|
||||||
|
completion_characters (float): The number of characters in the completion response. Used for vertex ai cost calculation.
|
||||||
custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
|
custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
|
||||||
custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
|
custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
|
||||||
custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
|
custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
|
||||||
|
@ -127,7 +135,16 @@ def cost_per_token(
|
||||||
|
|
||||||
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
||||||
print_verbose(f"Looking up model={model} in model_cost_map")
|
print_verbose(f"Looking up model={model} in model_cost_map")
|
||||||
if custom_llm_provider == "vertex_ai" or custom_llm_provider == "gemini":
|
if custom_llm_provider == "vertex_ai":
|
||||||
|
return google_cost_per_character(
|
||||||
|
model=model_without_prefix,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
prompt_characters=prompt_characters,
|
||||||
|
completion_characters=completion_characters,
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
)
|
||||||
|
elif custom_llm_provider == "gemini":
|
||||||
return google_cost_per_token(
|
return google_cost_per_token(
|
||||||
model=model_without_prefix,
|
model=model_without_prefix,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
@ -401,7 +418,9 @@ def completion_cost(
|
||||||
model = "dall-e-2" # for dall-e-2, azure expects an empty model name
|
model = "dall-e-2" # for dall-e-2, azure expects an empty model name
|
||||||
# Handle Inputs to completion_cost
|
# Handle Inputs to completion_cost
|
||||||
prompt_tokens = 0
|
prompt_tokens = 0
|
||||||
|
prompt_characters = 0
|
||||||
completion_tokens = 0
|
completion_tokens = 0
|
||||||
|
completion_characters = 0
|
||||||
custom_llm_provider = None
|
custom_llm_provider = None
|
||||||
if completion_response is not None:
|
if completion_response is not None:
|
||||||
# get input/output tokens from completion_response
|
# get input/output tokens from completion_response
|
||||||
|
@ -518,6 +537,30 @@ def completion_cost(
|
||||||
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
custom_llm_provider is not None
|
||||||
|
and custom_llm_provider == "vertex_ai"
|
||||||
|
and completion_response is not None
|
||||||
|
and isinstance(completion_response, ModelResponse)
|
||||||
|
):
|
||||||
|
# Calculate the prompt characters + response characters
|
||||||
|
if len("messages") > 0:
|
||||||
|
prompt_string = litellm.utils.get_formatted_prompt(
|
||||||
|
data={"messages": messages}, call_type="completion"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
prompt_string = ""
|
||||||
|
|
||||||
|
prompt_characters = litellm.utils._count_characters(text=prompt_string)
|
||||||
|
|
||||||
|
completion_string = litellm.utils.get_response_string(
|
||||||
|
response_obj=completion_response
|
||||||
|
)
|
||||||
|
|
||||||
|
completion_characters = litellm.utils._count_characters(
|
||||||
|
text=completion_string
|
||||||
|
)
|
||||||
|
|
||||||
(
|
(
|
||||||
prompt_tokens_cost_usd_dollar,
|
prompt_tokens_cost_usd_dollar,
|
||||||
completion_tokens_cost_usd_dollar,
|
completion_tokens_cost_usd_dollar,
|
||||||
|
@ -530,6 +573,8 @@ def completion_cost(
|
||||||
region_name=region_name,
|
region_name=region_name,
|
||||||
custom_cost_per_second=custom_cost_per_second,
|
custom_cost_per_second=custom_cost_per_second,
|
||||||
custom_cost_per_token=custom_cost_per_token,
|
custom_cost_per_token=custom_cost_per_token,
|
||||||
|
prompt_characters=prompt_characters,
|
||||||
|
completion_characters=completion_characters,
|
||||||
)
|
)
|
||||||
_final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
_final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||||
print_verbose(
|
print_verbose(
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
# What is this?
|
# What is this?
|
||||||
## Cost calculation for Google AI Studio / Vertex AI models
|
## Cost calculation for Google AI Studio / Vertex AI models
|
||||||
from typing import Literal, Tuple
|
import traceback
|
||||||
|
from typing import List, Literal, Optional, Tuple
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
|
from litellm import verbose_logger
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Gemini pricing covers:
|
Gemini pricing covers:
|
||||||
|
@ -12,6 +14,12 @@ Gemini pricing covers:
|
||||||
- video
|
- video
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
Vertex AI -> character based pricing
|
||||||
|
|
||||||
|
Google AI Studio -> token based pricing
|
||||||
|
"""
|
||||||
|
|
||||||
models_without_dynamic_pricing = ["gemini-1.0-pro", "gemini-pro"]
|
models_without_dynamic_pricing = ["gemini-1.0-pro", "gemini-pro"]
|
||||||
|
|
||||||
|
|
||||||
|
@ -21,6 +29,124 @@ def _is_above_128k(tokens: float) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def cost_per_character(
|
||||||
|
model: str,
|
||||||
|
custom_llm_provider: str,
|
||||||
|
prompt_tokens: float,
|
||||||
|
completion_tokens: float,
|
||||||
|
prompt_characters: float,
|
||||||
|
completion_characters: float,
|
||||||
|
) -> Tuple[float, float]:
|
||||||
|
"""
|
||||||
|
Calculates the cost per character for a given VertexAI model, input messages, and response object.
|
||||||
|
|
||||||
|
Input:
|
||||||
|
- model: str, the model name without provider prefix
|
||||||
|
- custom_llm_provider: str, "vertex_ai-*"
|
||||||
|
- prompt_characters: float, the number of input characters
|
||||||
|
- completion_characters: float, the number of output characters
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception if model requires >128k pricing, but model cost not mapped
|
||||||
|
"""
|
||||||
|
model_info = litellm.get_model_info(
|
||||||
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
|
)
|
||||||
|
|
||||||
|
## GET MODEL INFO
|
||||||
|
model_info = litellm.get_model_info(
|
||||||
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
|
)
|
||||||
|
|
||||||
|
## CALCULATE INPUT COST
|
||||||
|
try:
|
||||||
|
if (
|
||||||
|
_is_above_128k(tokens=prompt_characters * 4) # 1 token = 4 char
|
||||||
|
and model not in models_without_dynamic_pricing
|
||||||
|
):
|
||||||
|
## check if character pricing, else default to token pricing
|
||||||
|
assert (
|
||||||
|
"input_cost_per_character_above_128k_tokens" in model_info
|
||||||
|
and model_info["input_cost_per_character_above_128k_tokens"] is not None
|
||||||
|
), "model info for model={} does not have 'input_cost_per_character_above_128k_tokens'-pricing for > 128k tokens\nmodel_info={}".format(
|
||||||
|
model, model_info
|
||||||
|
)
|
||||||
|
prompt_cost = (
|
||||||
|
prompt_characters
|
||||||
|
* model_info["input_cost_per_character_above_128k_tokens"]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
assert (
|
||||||
|
"input_cost_per_character" in model_info
|
||||||
|
and model_info["input_cost_per_character"] is not None
|
||||||
|
), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
|
||||||
|
model, model_info
|
||||||
|
)
|
||||||
|
prompt_cost = prompt_characters * model_info["input_cost_per_character"]
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.error(
|
||||||
|
"litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): Exception occured - {}\n{}\n\
|
||||||
|
Defaulting to (cost_per_token * 4) calculation for prompt_cost".format(
|
||||||
|
str(e), traceback.format_exc()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
initial_prompt_cost, _ = cost_per_token(
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt_cost = initial_prompt_cost * 4
|
||||||
|
|
||||||
|
## CALCULATE OUTPUT COST
|
||||||
|
try:
|
||||||
|
if (
|
||||||
|
_is_above_128k(tokens=completion_characters * 4) # 1 token = 4 char
|
||||||
|
and model not in models_without_dynamic_pricing
|
||||||
|
):
|
||||||
|
assert (
|
||||||
|
"output_cost_per_character_above_128k_tokens" in model_info
|
||||||
|
and model_info["output_cost_per_character_above_128k_tokens"]
|
||||||
|
is not None
|
||||||
|
), "model info for model={} does not have 'output_cost_per_character_above_128k_tokens' pricing\nmodel_info={}".format(
|
||||||
|
model, model_info
|
||||||
|
)
|
||||||
|
completion_cost = (
|
||||||
|
completion_tokens
|
||||||
|
* model_info["output_cost_per_character_above_128k_tokens"]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
assert (
|
||||||
|
"output_cost_per_character" in model_info
|
||||||
|
and model_info["output_cost_per_character"] is not None
|
||||||
|
), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
|
||||||
|
model, model_info
|
||||||
|
)
|
||||||
|
completion_cost = (
|
||||||
|
completion_tokens * model_info["output_cost_per_character"]
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.error(
|
||||||
|
"litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): Exception occured - {}\n{}\n\
|
||||||
|
Defaulting to (cost_per_token * 4) calculation for completion_cost".format(
|
||||||
|
str(e), traceback.format_exc()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
_, initial_completion_cost = cost_per_token(
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
)
|
||||||
|
|
||||||
|
completion_cost = initial_completion_cost * 4
|
||||||
|
return prompt_cost, completion_cost
|
||||||
|
|
||||||
|
|
||||||
def cost_per_token(
|
def cost_per_token(
|
||||||
model: str,
|
model: str,
|
||||||
custom_llm_provider: str,
|
custom_llm_provider: str,
|
||||||
|
@ -53,7 +179,8 @@ def cost_per_token(
|
||||||
and model not in models_without_dynamic_pricing
|
and model not in models_without_dynamic_pricing
|
||||||
):
|
):
|
||||||
assert (
|
assert (
|
||||||
model_info["input_cost_per_token_above_128k_tokens"] is not None
|
"input_cost_per_token_above_128k_tokens" in model_info
|
||||||
|
and model_info["input_cost_per_token_above_128k_tokens"] is not None
|
||||||
), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
|
), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
|
||||||
model, model_info
|
model, model_info
|
||||||
)
|
)
|
||||||
|
@ -69,7 +196,8 @@ def cost_per_token(
|
||||||
and model not in models_without_dynamic_pricing
|
and model not in models_without_dynamic_pricing
|
||||||
):
|
):
|
||||||
assert (
|
assert (
|
||||||
model_info["output_cost_per_token_above_128k_tokens"] is not None
|
"output_cost_per_token_above_128k_tokens" in model_info
|
||||||
|
and model_info["output_cost_per_token_above_128k_tokens"] is not None
|
||||||
), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
|
), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
|
||||||
model, model_info
|
model, model_info
|
||||||
)
|
)
|
||||||
|
|
|
@ -576,7 +576,7 @@ def test_together_ai_qwen_completion_cost():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("above_128k", [False, True])
|
@pytest.mark.parametrize("above_128k", [False, True])
|
||||||
@pytest.mark.parametrize("provider", ["vertex_ai", "gemini"])
|
@pytest.mark.parametrize("provider", ["gemini"])
|
||||||
def test_gemini_completion_cost(above_128k, provider):
|
def test_gemini_completion_cost(above_128k, provider):
|
||||||
"""
|
"""
|
||||||
Check if cost correctly calculated for gemini models based on context window
|
Check if cost correctly calculated for gemini models based on context window
|
||||||
|
@ -628,3 +628,35 @@ def test_gemini_completion_cost(above_128k, provider):
|
||||||
|
|
||||||
assert calculated_input_cost == input_cost
|
assert calculated_input_cost == input_cost
|
||||||
assert calculated_output_cost == output_cost
|
assert calculated_output_cost == output_cost
|
||||||
|
|
||||||
|
|
||||||
|
def _count_characters(text):
|
||||||
|
# Remove white spaces and count characters
|
||||||
|
filtered_text = "".join(char for char in text if not char.isspace())
|
||||||
|
return len(filtered_text)
|
||||||
|
|
||||||
|
|
||||||
|
def test_vertex_ai_completion_cost():
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
|
||||||
|
text = "The quick brown fox jumps over the lazy dog."
|
||||||
|
characters = _count_characters(text=text)
|
||||||
|
|
||||||
|
model_info = litellm.get_model_info(model="gemini-1.5-flash")
|
||||||
|
|
||||||
|
print("\nExpected model info:\n{}\n\n".format(model_info))
|
||||||
|
|
||||||
|
expected_input_cost = characters * model_info["input_cost_per_character"]
|
||||||
|
|
||||||
|
## CALCULATED COST
|
||||||
|
calculated_input_cost, calculated_output_cost = cost_per_token(
|
||||||
|
model="gemini-1.5-flash",
|
||||||
|
custom_llm_provider="vertex_ai",
|
||||||
|
prompt_characters=characters,
|
||||||
|
completion_characters=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert round(expected_input_cost, 6) == round(calculated_input_cost, 6)
|
||||||
|
print("expected_input_cost: {}".format(expected_input_cost))
|
||||||
|
print("calculated_input_cost: {}".format(calculated_input_cost))
|
||||||
|
|
|
@ -44,15 +44,25 @@ class ModelInfo(TypedDict, total=False):
|
||||||
max_input_tokens: Required[Optional[int]]
|
max_input_tokens: Required[Optional[int]]
|
||||||
max_output_tokens: Required[Optional[int]]
|
max_output_tokens: Required[Optional[int]]
|
||||||
input_cost_per_token: Required[float]
|
input_cost_per_token: Required[float]
|
||||||
input_cost_per_token_above_128k_tokens: Optional[float]
|
input_cost_per_character: Optional[float] # only for vertex ai models
|
||||||
input_cost_per_image: Optional[float]
|
input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models
|
||||||
input_cost_per_audio_per_second: Optional[float]
|
input_cost_per_character_above_128k_tokens: Optional[
|
||||||
input_cost_per_video_per_second: Optional[float]
|
float
|
||||||
|
] # only for vertex ai models
|
||||||
|
input_cost_per_image: Optional[float] # only for vertex ai models
|
||||||
|
input_cost_per_audio_per_second: Optional[float] # only for vertex ai models
|
||||||
|
input_cost_per_video_per_second: Optional[float] # only for vertex ai models
|
||||||
output_cost_per_token: Required[float]
|
output_cost_per_token: Required[float]
|
||||||
output_cost_per_token_above_128k_tokens: Optional[float]
|
output_cost_per_character: Optional[float] # only for vertex ai models
|
||||||
|
output_cost_per_token_above_128k_tokens: Optional[
|
||||||
|
float
|
||||||
|
] # only for vertex ai models
|
||||||
|
output_cost_per_character_above_128k_tokens: Optional[
|
||||||
|
float
|
||||||
|
] # only for vertex ai models
|
||||||
output_cost_per_image: Optional[float]
|
output_cost_per_image: Optional[float]
|
||||||
output_cost_per_video_per_second: Optional[float]
|
output_cost_per_video_per_second: Optional[float] # only for vertex ai models
|
||||||
output_cost_per_audio_per_second: Optional[float]
|
output_cost_per_audio_per_second: Optional[float] # only for vertex ai models
|
||||||
litellm_provider: Required[str]
|
litellm_provider: Required[str]
|
||||||
mode: Required[
|
mode: Required[
|
||||||
Literal[
|
Literal[
|
||||||
|
|
|
@ -3811,6 +3811,12 @@ def get_supported_openai_params(
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _count_characters(text: str) -> int:
|
||||||
|
# Remove white spaces and count characters
|
||||||
|
filtered_text = "".join(char for char in text if not char.isspace())
|
||||||
|
return len(filtered_text)
|
||||||
|
|
||||||
|
|
||||||
def get_formatted_prompt(
|
def get_formatted_prompt(
|
||||||
data: dict,
|
data: dict,
|
||||||
call_type: Literal[
|
call_type: Literal[
|
||||||
|
@ -3829,9 +3835,20 @@ def get_formatted_prompt(
|
||||||
"""
|
"""
|
||||||
prompt = ""
|
prompt = ""
|
||||||
if call_type == "completion":
|
if call_type == "completion":
|
||||||
for m in data["messages"]:
|
for message in data["messages"]:
|
||||||
if "content" in m and isinstance(m["content"], str):
|
if message.get("content", None) is not None:
|
||||||
prompt += m["content"]
|
content = message.get("content")
|
||||||
|
if isinstance(content, str):
|
||||||
|
prompt += message["content"]
|
||||||
|
elif isinstance(content, List):
|
||||||
|
for c in content:
|
||||||
|
if c["type"] == "text":
|
||||||
|
prompt += c["text"]
|
||||||
|
if "tool_calls" in message:
|
||||||
|
for tool_call in message["tool_calls"]:
|
||||||
|
if "function" in tool_call:
|
||||||
|
function_arguments = tool_call["function"]["arguments"]
|
||||||
|
prompt += function_arguments
|
||||||
elif call_type == "text_completion":
|
elif call_type == "text_completion":
|
||||||
prompt = data["prompt"]
|
prompt = data["prompt"]
|
||||||
elif call_type == "embedding" or call_type == "moderation":
|
elif call_type == "embedding" or call_type == "moderation":
|
||||||
|
@ -3848,6 +3865,17 @@ def get_formatted_prompt(
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
|
|
||||||
|
def get_response_string(response_obj: ModelResponse) -> str:
|
||||||
|
_choices: List[Choices] = response_obj.choices # type: ignore
|
||||||
|
|
||||||
|
response_str = ""
|
||||||
|
for choice in _choices:
|
||||||
|
if choice.message.content is not None:
|
||||||
|
response_str += choice.message.content
|
||||||
|
|
||||||
|
return response_str
|
||||||
|
|
||||||
|
|
||||||
def _is_non_openai_azure_model(model: str) -> bool:
|
def _is_non_openai_azure_model(model: str) -> bool:
|
||||||
try:
|
try:
|
||||||
model_name = model.split("/", 1)[1]
|
model_name = model.split("/", 1)[1]
|
||||||
|
@ -4394,13 +4422,22 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
||||||
max_input_tokens=_model_info.get("max_input_tokens", None),
|
max_input_tokens=_model_info.get("max_input_tokens", None),
|
||||||
max_output_tokens=_model_info.get("max_output_tokens", None),
|
max_output_tokens=_model_info.get("max_output_tokens", None),
|
||||||
input_cost_per_token=_model_info.get("input_cost_per_token", 0),
|
input_cost_per_token=_model_info.get("input_cost_per_token", 0),
|
||||||
|
input_cost_per_character=_model_info.get(
|
||||||
|
"input_cost_per_character", None
|
||||||
|
),
|
||||||
input_cost_per_token_above_128k_tokens=_model_info.get(
|
input_cost_per_token_above_128k_tokens=_model_info.get(
|
||||||
"input_cost_per_token_above_128k_tokens", None
|
"input_cost_per_token_above_128k_tokens", None
|
||||||
),
|
),
|
||||||
output_cost_per_token=_model_info.get("output_cost_per_token", 0),
|
output_cost_per_token=_model_info.get("output_cost_per_token", 0),
|
||||||
|
output_cost_per_character=_model_info.get(
|
||||||
|
"output_cost_per_character", None
|
||||||
|
),
|
||||||
output_cost_per_token_above_128k_tokens=_model_info.get(
|
output_cost_per_token_above_128k_tokens=_model_info.get(
|
||||||
"output_cost_per_token_above_128k_tokens", None
|
"output_cost_per_token_above_128k_tokens", None
|
||||||
),
|
),
|
||||||
|
output_cost_per_character_above_128k_tokens=_model_info.get(
|
||||||
|
"output_cost_per_character_above_128k_tokens", None
|
||||||
|
),
|
||||||
litellm_provider=_model_info.get(
|
litellm_provider=_model_info.get(
|
||||||
"litellm_provider", custom_llm_provider
|
"litellm_provider", custom_llm_provider
|
||||||
),
|
),
|
||||||
|
@ -4428,13 +4465,22 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
||||||
max_input_tokens=_model_info.get("max_input_tokens", None),
|
max_input_tokens=_model_info.get("max_input_tokens", None),
|
||||||
max_output_tokens=_model_info.get("max_output_tokens", None),
|
max_output_tokens=_model_info.get("max_output_tokens", None),
|
||||||
input_cost_per_token=_model_info.get("input_cost_per_token", 0),
|
input_cost_per_token=_model_info.get("input_cost_per_token", 0),
|
||||||
|
input_cost_per_character=_model_info.get(
|
||||||
|
"input_cost_per_character", None
|
||||||
|
),
|
||||||
input_cost_per_token_above_128k_tokens=_model_info.get(
|
input_cost_per_token_above_128k_tokens=_model_info.get(
|
||||||
"input_cost_per_token_above_128k_tokens", None
|
"input_cost_per_token_above_128k_tokens", None
|
||||||
),
|
),
|
||||||
output_cost_per_token=_model_info.get("output_cost_per_token", 0),
|
output_cost_per_token=_model_info.get("output_cost_per_token", 0),
|
||||||
|
output_cost_per_character=_model_info.get(
|
||||||
|
"output_cost_per_character", None
|
||||||
|
),
|
||||||
output_cost_per_token_above_128k_tokens=_model_info.get(
|
output_cost_per_token_above_128k_tokens=_model_info.get(
|
||||||
"output_cost_per_token_above_128k_tokens", None
|
"output_cost_per_token_above_128k_tokens", None
|
||||||
),
|
),
|
||||||
|
output_cost_per_character_above_128k_tokens=_model_info.get(
|
||||||
|
"output_cost_per_character_above_128k_tokens", None
|
||||||
|
),
|
||||||
litellm_provider=_model_info.get(
|
litellm_provider=_model_info.get(
|
||||||
"litellm_provider", custom_llm_provider
|
"litellm_provider", custom_llm_provider
|
||||||
),
|
),
|
||||||
|
@ -4462,13 +4508,22 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
||||||
max_input_tokens=_model_info.get("max_input_tokens", None),
|
max_input_tokens=_model_info.get("max_input_tokens", None),
|
||||||
max_output_tokens=_model_info.get("max_output_tokens", None),
|
max_output_tokens=_model_info.get("max_output_tokens", None),
|
||||||
input_cost_per_token=_model_info.get("input_cost_per_token", 0),
|
input_cost_per_token=_model_info.get("input_cost_per_token", 0),
|
||||||
|
input_cost_per_character=_model_info.get(
|
||||||
|
"input_cost_per_character", None
|
||||||
|
),
|
||||||
input_cost_per_token_above_128k_tokens=_model_info.get(
|
input_cost_per_token_above_128k_tokens=_model_info.get(
|
||||||
"input_cost_per_token_above_128k_tokens", None
|
"input_cost_per_token_above_128k_tokens", None
|
||||||
),
|
),
|
||||||
output_cost_per_token=_model_info.get("output_cost_per_token", 0),
|
output_cost_per_token=_model_info.get("output_cost_per_token", 0),
|
||||||
|
output_cost_per_character=_model_info.get(
|
||||||
|
"output_cost_per_character", None
|
||||||
|
),
|
||||||
output_cost_per_token_above_128k_tokens=_model_info.get(
|
output_cost_per_token_above_128k_tokens=_model_info.get(
|
||||||
"output_cost_per_token_above_128k_tokens", None
|
"output_cost_per_token_above_128k_tokens", None
|
||||||
),
|
),
|
||||||
|
output_cost_per_character_above_128k_tokens=_model_info.get(
|
||||||
|
"output_cost_per_character_above_128k_tokens", None
|
||||||
|
),
|
||||||
litellm_provider=_model_info.get(
|
litellm_provider=_model_info.get(
|
||||||
"litellm_provider", custom_llm_provider
|
"litellm_provider", custom_llm_provider
|
||||||
),
|
),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue