mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Merge branch 'main' into litellm_fix_in_mem_usage
This commit is contained in:
commit
3bcf9dd9fb
8 changed files with 267 additions and 89 deletions
|
@ -101,8 +101,12 @@ def cost_per_token(
|
||||||
if custom_llm_provider is not None:
|
if custom_llm_provider is not None:
|
||||||
model_with_provider = custom_llm_provider + "/" + model
|
model_with_provider = custom_llm_provider + "/" + model
|
||||||
if region_name is not None:
|
if region_name is not None:
|
||||||
model_with_provider_and_region = f"{custom_llm_provider}/{region_name}/{model}"
|
model_with_provider_and_region = (
|
||||||
if model_with_provider_and_region in model_cost_ref: # use region based pricing, if it's available
|
f"{custom_llm_provider}/{region_name}/{model}"
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
model_with_provider_and_region in model_cost_ref
|
||||||
|
): # use region based pricing, if it's available
|
||||||
model_with_provider = model_with_provider_and_region
|
model_with_provider = model_with_provider_and_region
|
||||||
else:
|
else:
|
||||||
_, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model)
|
_, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model)
|
||||||
|
@ -118,7 +122,9 @@ def cost_per_token(
|
||||||
Option2. model = "openai/gpt-4" - model = provider/model
|
Option2. model = "openai/gpt-4" - model = provider/model
|
||||||
Option3. model = "anthropic.claude-3" - model = model
|
Option3. model = "anthropic.claude-3" - model = model
|
||||||
"""
|
"""
|
||||||
if model_with_provider in model_cost_ref: # Option 2. use model with provider, model = "openai/gpt-4"
|
if (
|
||||||
|
model_with_provider in model_cost_ref
|
||||||
|
): # Option 2. use model with provider, model = "openai/gpt-4"
|
||||||
model = model_with_provider
|
model = model_with_provider
|
||||||
elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4"
|
elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4"
|
||||||
model = model
|
model = model
|
||||||
|
@ -154,29 +160,45 @@ def cost_per_token(
|
||||||
)
|
)
|
||||||
elif model in model_cost_ref:
|
elif model in model_cost_ref:
|
||||||
print_verbose(f"Success: model={model} in model_cost_map")
|
print_verbose(f"Success: model={model} in model_cost_map")
|
||||||
print_verbose(f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}")
|
print_verbose(
|
||||||
|
f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}"
|
||||||
|
)
|
||||||
if (
|
if (
|
||||||
model_cost_ref[model].get("input_cost_per_token", None) is not None
|
model_cost_ref[model].get("input_cost_per_token", None) is not None
|
||||||
and model_cost_ref[model].get("output_cost_per_token", None) is not None
|
and model_cost_ref[model].get("output_cost_per_token", None) is not None
|
||||||
):
|
):
|
||||||
## COST PER TOKEN ##
|
## COST PER TOKEN ##
|
||||||
prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
prompt_tokens_cost_usd_dollar = (
|
||||||
completion_tokens_cost_usd_dollar = model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||||
elif model_cost_ref[model].get("output_cost_per_second", None) is not None and response_time_ms is not None:
|
)
|
||||||
|
completion_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
||||||
|
)
|
||||||
|
elif (
|
||||||
|
model_cost_ref[model].get("output_cost_per_second", None) is not None
|
||||||
|
and response_time_ms is not None
|
||||||
|
):
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}"
|
f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}"
|
||||||
)
|
)
|
||||||
## COST PER SECOND ##
|
## COST PER SECOND ##
|
||||||
prompt_tokens_cost_usd_dollar = 0
|
prompt_tokens_cost_usd_dollar = 0
|
||||||
completion_tokens_cost_usd_dollar = (
|
completion_tokens_cost_usd_dollar = (
|
||||||
model_cost_ref[model]["output_cost_per_second"] * response_time_ms / 1000
|
model_cost_ref[model]["output_cost_per_second"]
|
||||||
|
* response_time_ms
|
||||||
|
/ 1000
|
||||||
)
|
)
|
||||||
elif model_cost_ref[model].get("input_cost_per_second", None) is not None and response_time_ms is not None:
|
elif (
|
||||||
|
model_cost_ref[model].get("input_cost_per_second", None) is not None
|
||||||
|
and response_time_ms is not None
|
||||||
|
):
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}"
|
f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}"
|
||||||
)
|
)
|
||||||
## COST PER SECOND ##
|
## COST PER SECOND ##
|
||||||
prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000
|
prompt_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000
|
||||||
|
)
|
||||||
completion_tokens_cost_usd_dollar = 0.0
|
completion_tokens_cost_usd_dollar = 0.0
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
|
f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
|
||||||
|
@ -185,40 +207,57 @@ def cost_per_token(
|
||||||
elif "ft:gpt-3.5-turbo" in model:
|
elif "ft:gpt-3.5-turbo" in model:
|
||||||
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
||||||
# fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
|
# fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
|
||||||
prompt_tokens_cost_usd_dollar = model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
|
prompt_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
|
||||||
|
)
|
||||||
completion_tokens_cost_usd_dollar = (
|
completion_tokens_cost_usd_dollar = (
|
||||||
model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"] * completion_tokens
|
model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"]
|
||||||
|
* completion_tokens
|
||||||
)
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
elif "ft:gpt-4-0613" in model:
|
elif "ft:gpt-4-0613" in model:
|
||||||
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
||||||
# fuzzy match ft:gpt-4-0613:abcd-id-cool-litellm
|
# fuzzy match ft:gpt-4-0613:abcd-id-cool-litellm
|
||||||
prompt_tokens_cost_usd_dollar = model_cost_ref["ft:gpt-4-0613"]["input_cost_per_token"] * prompt_tokens
|
prompt_tokens_cost_usd_dollar = (
|
||||||
completion_tokens_cost_usd_dollar = model_cost_ref["ft:gpt-4-0613"]["output_cost_per_token"] * completion_tokens
|
model_cost_ref["ft:gpt-4-0613"]["input_cost_per_token"] * prompt_tokens
|
||||||
|
)
|
||||||
|
completion_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref["ft:gpt-4-0613"]["output_cost_per_token"] * completion_tokens
|
||||||
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
elif "ft:gpt-4o-2024-05-13" in model:
|
elif "ft:gpt-4o-2024-05-13" in model:
|
||||||
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
||||||
# fuzzy match ft:gpt-4o-2024-05-13:abcd-id-cool-litellm
|
# fuzzy match ft:gpt-4o-2024-05-13:abcd-id-cool-litellm
|
||||||
prompt_tokens_cost_usd_dollar = model_cost_ref["ft:gpt-4o-2024-05-13"]["input_cost_per_token"] * prompt_tokens
|
prompt_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref["ft:gpt-4o-2024-05-13"]["input_cost_per_token"]
|
||||||
|
* prompt_tokens
|
||||||
|
)
|
||||||
completion_tokens_cost_usd_dollar = (
|
completion_tokens_cost_usd_dollar = (
|
||||||
model_cost_ref["ft:gpt-4o-2024-05-13"]["output_cost_per_token"] * completion_tokens
|
model_cost_ref["ft:gpt-4o-2024-05-13"]["output_cost_per_token"]
|
||||||
|
* completion_tokens
|
||||||
)
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
|
|
||||||
elif "ft:davinci-002" in model:
|
elif "ft:davinci-002" in model:
|
||||||
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
||||||
# fuzzy match ft:davinci-002:abcd-id-cool-litellm
|
# fuzzy match ft:davinci-002:abcd-id-cool-litellm
|
||||||
prompt_tokens_cost_usd_dollar = model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens
|
prompt_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens
|
||||||
|
)
|
||||||
completion_tokens_cost_usd_dollar = (
|
completion_tokens_cost_usd_dollar = (
|
||||||
model_cost_ref["ft:davinci-002"]["output_cost_per_token"] * completion_tokens
|
model_cost_ref["ft:davinci-002"]["output_cost_per_token"]
|
||||||
|
* completion_tokens
|
||||||
)
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
elif "ft:babbage-002" in model:
|
elif "ft:babbage-002" in model:
|
||||||
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
||||||
# fuzzy match ft:babbage-002:abcd-id-cool-litellm
|
# fuzzy match ft:babbage-002:abcd-id-cool-litellm
|
||||||
prompt_tokens_cost_usd_dollar = model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens
|
prompt_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens
|
||||||
|
)
|
||||||
completion_tokens_cost_usd_dollar = (
|
completion_tokens_cost_usd_dollar = (
|
||||||
model_cost_ref["ft:babbage-002"]["output_cost_per_token"] * completion_tokens
|
model_cost_ref["ft:babbage-002"]["output_cost_per_token"]
|
||||||
|
* completion_tokens
|
||||||
)
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
elif model in litellm.azure_llms:
|
elif model in litellm.azure_llms:
|
||||||
|
@ -227,17 +266,25 @@ def cost_per_token(
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
|
f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
|
||||||
)
|
)
|
||||||
prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
prompt_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||||
|
)
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
|
f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
|
||||||
)
|
)
|
||||||
completion_tokens_cost_usd_dollar = model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
completion_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
||||||
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
elif model in litellm.azure_embedding_models:
|
elif model in litellm.azure_embedding_models:
|
||||||
verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
|
verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
|
||||||
model = litellm.azure_embedding_models[model]
|
model = litellm.azure_embedding_models[model]
|
||||||
prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
prompt_tokens_cost_usd_dollar = (
|
||||||
completion_tokens_cost_usd_dollar = model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||||
|
)
|
||||||
|
completion_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
||||||
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
else:
|
else:
|
||||||
# if model is not in model_prices_and_context_window.json. Raise an exception-let users know
|
# if model is not in model_prices_and_context_window.json. Raise an exception-let users know
|
||||||
|
@ -261,7 +308,9 @@ def get_model_params_and_category(model_name) -> str:
|
||||||
import re
|
import re
|
||||||
|
|
||||||
model_name = model_name.lower()
|
model_name = model_name.lower()
|
||||||
re_params_match = re.search(r"(\d+b)", model_name) # catch all decimals like 3b, 70b, etc
|
re_params_match = re.search(
|
||||||
|
r"(\d+b)", model_name
|
||||||
|
) # catch all decimals like 3b, 70b, etc
|
||||||
category = None
|
category = None
|
||||||
if re_params_match is not None:
|
if re_params_match is not None:
|
||||||
params_match = str(re_params_match.group(1))
|
params_match = str(re_params_match.group(1))
|
||||||
|
@ -292,7 +341,9 @@ def get_model_params_and_category(model_name) -> str:
|
||||||
def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
|
def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
|
||||||
# see https://replicate.com/pricing
|
# see https://replicate.com/pricing
|
||||||
# for all litellm currently supported LLMs, almost all requests go to a100_80gb
|
# for all litellm currently supported LLMs, almost all requests go to a100_80gb
|
||||||
a100_80gb_price_per_second_public = 0.001400 # assume all calls sent to A100 80GB for now
|
a100_80gb_price_per_second_public = (
|
||||||
|
0.001400 # assume all calls sent to A100 80GB for now
|
||||||
|
)
|
||||||
if total_time == 0.0: # total time is in ms
|
if total_time == 0.0: # total time is in ms
|
||||||
start_time = completion_response["created"]
|
start_time = completion_response["created"]
|
||||||
end_time = getattr(completion_response, "ended", time.time())
|
end_time = getattr(completion_response, "ended", time.time())
|
||||||
|
@ -377,13 +428,16 @@ def completion_cost(
|
||||||
prompt_characters = 0
|
prompt_characters = 0
|
||||||
completion_tokens = 0
|
completion_tokens = 0
|
||||||
completion_characters = 0
|
completion_characters = 0
|
||||||
custom_llm_provider = None
|
|
||||||
if completion_response is not None:
|
if completion_response is not None:
|
||||||
# get input/output tokens from completion_response
|
# get input/output tokens from completion_response
|
||||||
prompt_tokens = completion_response.get("usage", {}).get("prompt_tokens", 0)
|
prompt_tokens = completion_response.get("usage", {}).get("prompt_tokens", 0)
|
||||||
completion_tokens = completion_response.get("usage", {}).get("completion_tokens", 0)
|
completion_tokens = completion_response.get("usage", {}).get(
|
||||||
|
"completion_tokens", 0
|
||||||
|
)
|
||||||
total_time = completion_response.get("_response_ms", 0)
|
total_time = completion_response.get("_response_ms", 0)
|
||||||
verbose_logger.debug(f"completion_response response ms: {completion_response.get('_response_ms')} ")
|
verbose_logger.debug(
|
||||||
|
f"completion_response response ms: {completion_response.get('_response_ms')} "
|
||||||
|
)
|
||||||
model = model or completion_response.get(
|
model = model or completion_response.get(
|
||||||
"model", None
|
"model", None
|
||||||
) # check if user passed an override for model, if it's none check completion_response['model']
|
) # check if user passed an override for model, if it's none check completion_response['model']
|
||||||
|
@ -393,16 +447,30 @@ def completion_cost(
|
||||||
and len(completion_response._hidden_params["model"]) > 0
|
and len(completion_response._hidden_params["model"]) > 0
|
||||||
):
|
):
|
||||||
model = completion_response._hidden_params.get("model", model)
|
model = completion_response._hidden_params.get("model", model)
|
||||||
custom_llm_provider = completion_response._hidden_params.get("custom_llm_provider", "")
|
custom_llm_provider = completion_response._hidden_params.get(
|
||||||
region_name = completion_response._hidden_params.get("region_name", region_name)
|
"custom_llm_provider", ""
|
||||||
size = completion_response._hidden_params.get("optional_params", {}).get(
|
)
|
||||||
|
region_name = completion_response._hidden_params.get(
|
||||||
|
"region_name", region_name
|
||||||
|
)
|
||||||
|
size = completion_response._hidden_params.get(
|
||||||
|
"optional_params", {}
|
||||||
|
).get(
|
||||||
"size", "1024-x-1024"
|
"size", "1024-x-1024"
|
||||||
) # openai default
|
) # openai default
|
||||||
quality = completion_response._hidden_params.get("optional_params", {}).get(
|
quality = completion_response._hidden_params.get(
|
||||||
|
"optional_params", {}
|
||||||
|
).get(
|
||||||
"quality", "standard"
|
"quality", "standard"
|
||||||
) # openai default
|
) # openai default
|
||||||
n = completion_response._hidden_params.get("optional_params", {}).get("n", 1) # openai default
|
n = completion_response._hidden_params.get("optional_params", {}).get(
|
||||||
|
"n", 1
|
||||||
|
) # openai default
|
||||||
else:
|
else:
|
||||||
|
if model is None:
|
||||||
|
raise ValueError(
|
||||||
|
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
||||||
|
)
|
||||||
if len(messages) > 0:
|
if len(messages) > 0:
|
||||||
prompt_tokens = token_counter(model=model, messages=messages)
|
prompt_tokens = token_counter(model=model, messages=messages)
|
||||||
elif len(prompt) > 0:
|
elif len(prompt) > 0:
|
||||||
|
@ -413,7 +481,19 @@ def completion_cost(
|
||||||
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if call_type == CallTypes.image_generation.value or call_type == CallTypes.aimage_generation.value:
|
if custom_llm_provider is None:
|
||||||
|
try:
|
||||||
|
_, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.error(
|
||||||
|
"litellm.cost_calculator.py::completion_cost() - Error inferring custom_llm_provider - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
call_type == CallTypes.image_generation.value
|
||||||
|
or call_type == CallTypes.aimage_generation.value
|
||||||
|
):
|
||||||
### IMAGE GENERATION COST CALCULATION ###
|
### IMAGE GENERATION COST CALCULATION ###
|
||||||
if custom_llm_provider == "vertex_ai":
|
if custom_llm_provider == "vertex_ai":
|
||||||
# https://cloud.google.com/vertex-ai/generative-ai/pricing
|
# https://cloud.google.com/vertex-ai/generative-ai/pricing
|
||||||
|
@ -431,23 +511,43 @@ def completion_cost(
|
||||||
height = int(size[0]) # if it's 1024-x-1024 vs. 1024x1024
|
height = int(size[0]) # if it's 1024-x-1024 vs. 1024x1024
|
||||||
width = int(size[1])
|
width = int(size[1])
|
||||||
verbose_logger.debug(f"image_gen_model_name: {image_gen_model_name}")
|
verbose_logger.debug(f"image_gen_model_name: {image_gen_model_name}")
|
||||||
verbose_logger.debug(f"image_gen_model_name_with_quality: {image_gen_model_name_with_quality}")
|
verbose_logger.debug(
|
||||||
|
f"image_gen_model_name_with_quality: {image_gen_model_name_with_quality}"
|
||||||
|
)
|
||||||
if image_gen_model_name in litellm.model_cost:
|
if image_gen_model_name in litellm.model_cost:
|
||||||
return litellm.model_cost[image_gen_model_name]["input_cost_per_pixel"] * height * width * n
|
return (
|
||||||
|
litellm.model_cost[image_gen_model_name]["input_cost_per_pixel"]
|
||||||
|
* height
|
||||||
|
* width
|
||||||
|
* n
|
||||||
|
)
|
||||||
elif image_gen_model_name_with_quality in litellm.model_cost:
|
elif image_gen_model_name_with_quality in litellm.model_cost:
|
||||||
return (
|
return (
|
||||||
litellm.model_cost[image_gen_model_name_with_quality]["input_cost_per_pixel"] * height * width * n
|
litellm.model_cost[image_gen_model_name_with_quality][
|
||||||
|
"input_cost_per_pixel"
|
||||||
|
]
|
||||||
|
* height
|
||||||
|
* width
|
||||||
|
* n
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise Exception(f"Model={image_gen_model_name} not found in completion cost model map")
|
raise Exception(
|
||||||
|
f"Model={image_gen_model_name} not found in completion cost model map"
|
||||||
|
)
|
||||||
# Calculate cost based on prompt_tokens, completion_tokens
|
# Calculate cost based on prompt_tokens, completion_tokens
|
||||||
if "togethercomputer" in model or "together_ai" in model or custom_llm_provider == "together_ai":
|
if (
|
||||||
|
"togethercomputer" in model
|
||||||
|
or "together_ai" in model
|
||||||
|
or custom_llm_provider == "together_ai"
|
||||||
|
):
|
||||||
# together ai prices based on size of llm
|
# together ai prices based on size of llm
|
||||||
# get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json
|
# get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json
|
||||||
model = get_model_params_and_category(model)
|
model = get_model_params_and_category(model)
|
||||||
# replicate llms are calculate based on time for request running
|
# replicate llms are calculate based on time for request running
|
||||||
# see https://replicate.com/pricing
|
# see https://replicate.com/pricing
|
||||||
elif (model in litellm.replicate_models or "replicate" in model) and model not in litellm.model_cost:
|
elif (
|
||||||
|
model in litellm.replicate_models or "replicate" in model
|
||||||
|
) and model not in litellm.model_cost:
|
||||||
# for unmapped replicate model, default to replicate's time tracking logic
|
# for unmapped replicate model, default to replicate's time tracking logic
|
||||||
return get_replicate_completion_pricing(completion_response, total_time)
|
return get_replicate_completion_pricing(completion_response, total_time)
|
||||||
|
|
||||||
|
@ -456,23 +556,26 @@ def completion_cost(
|
||||||
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
|
||||||
custom_llm_provider is not None
|
|
||||||
and custom_llm_provider == "vertex_ai"
|
|
||||||
and completion_response is not None
|
|
||||||
and isinstance(completion_response, ModelResponse)
|
|
||||||
):
|
|
||||||
# Calculate the prompt characters + response characters
|
# Calculate the prompt characters + response characters
|
||||||
if len("messages") > 0:
|
if len("messages") > 0:
|
||||||
prompt_string = litellm.utils.get_formatted_prompt(data={"messages": messages}, call_type="completion")
|
prompt_string = litellm.utils.get_formatted_prompt(
|
||||||
|
data={"messages": messages}, call_type="completion"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
prompt_string = ""
|
prompt_string = ""
|
||||||
|
|
||||||
prompt_characters = litellm.utils._count_characters(text=prompt_string)
|
prompt_characters = litellm.utils._count_characters(text=prompt_string)
|
||||||
|
if completion_response is not None and isinstance(
|
||||||
|
completion_response, ModelResponse
|
||||||
|
):
|
||||||
|
completion_string = litellm.utils.get_response_string(
|
||||||
|
response_obj=completion_response
|
||||||
|
)
|
||||||
|
|
||||||
completion_string = litellm.utils.get_response_string(response_obj=completion_response)
|
completion_characters = litellm.utils._count_characters(
|
||||||
|
text=completion_string
|
||||||
completion_characters = litellm.utils._count_characters(text=completion_string)
|
)
|
||||||
|
|
||||||
(
|
(
|
||||||
prompt_tokens_cost_usd_dollar,
|
prompt_tokens_cost_usd_dollar,
|
||||||
|
@ -544,7 +647,9 @@ def response_cost_calculator(
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
if (
|
if (
|
||||||
model in litellm.model_cost and custom_pricing is not None and custom_llm_provider is True
|
model in litellm.model_cost
|
||||||
|
and custom_pricing is not None
|
||||||
|
and custom_llm_provider is True
|
||||||
): # override defaults if custom pricing is set
|
): # override defaults if custom pricing is set
|
||||||
base_model = model
|
base_model = model
|
||||||
# base_model defaults to None if not set on model_info
|
# base_model defaults to None if not set on model_info
|
||||||
|
@ -556,5 +661,7 @@ def response_cost_calculator(
|
||||||
)
|
)
|
||||||
return response_cost
|
return response_cost
|
||||||
except litellm.NotFoundError as e:
|
except litellm.NotFoundError as e:
|
||||||
print_verbose(f"Model={model} for LLM Provider={custom_llm_provider} not found in completion cost map.")
|
print_verbose(
|
||||||
|
f"Model={model} for LLM Provider={custom_llm_provider} not found in completion cost map."
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -660,8 +660,16 @@ class AzureChatCompletion(BaseLLM):
|
||||||
response = await azure_client.chat.completions.create(
|
response = await azure_client.chat.completions.create(
|
||||||
**data, timeout=timeout
|
**data, timeout=timeout
|
||||||
)
|
)
|
||||||
|
|
||||||
|
stringified_response = response.model_dump()
|
||||||
|
logging_obj.post_call(
|
||||||
|
input=data["messages"],
|
||||||
|
api_key=api_key,
|
||||||
|
original_response=stringified_response,
|
||||||
|
additional_args={"complete_input_dict": data},
|
||||||
|
)
|
||||||
return convert_to_model_response_object(
|
return convert_to_model_response_object(
|
||||||
response_object=response.model_dump(),
|
response_object=stringified_response,
|
||||||
model_response_object=model_response,
|
model_response_object=model_response,
|
||||||
)
|
)
|
||||||
except AzureOpenAIError as e:
|
except AzureOpenAIError as e:
|
||||||
|
|
|
@ -663,6 +663,10 @@ def convert_url_to_base64(url):
|
||||||
image_bytes = response.content
|
image_bytes = response.content
|
||||||
base64_image = base64.b64encode(image_bytes).decode("utf-8")
|
base64_image = base64.b64encode(image_bytes).decode("utf-8")
|
||||||
|
|
||||||
|
image_type = response.headers.get("Content-Type", None)
|
||||||
|
if image_type is not None and image_type.startswith("image/"):
|
||||||
|
img_type = image_type
|
||||||
|
else:
|
||||||
img_type = url.split(".")[-1].lower()
|
img_type = url.split(".")[-1].lower()
|
||||||
if img_type == "jpg" or img_type == "jpeg":
|
if img_type == "jpg" or img_type == "jpeg":
|
||||||
img_type = "image/jpeg"
|
img_type = "image/jpeg"
|
||||||
|
|
|
@ -1025,7 +1025,7 @@ def completion(
|
||||||
client=client, # pass AsyncAzureOpenAI, AzureOpenAI client
|
client=client, # pass AsyncAzureOpenAI, AzureOpenAI client
|
||||||
)
|
)
|
||||||
|
|
||||||
if optional_params.get("stream", False) or acompletion == True:
|
if optional_params.get("stream", False):
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging.post_call(
|
logging.post_call(
|
||||||
input=messages,
|
input=messages,
|
||||||
|
|
|
@ -175,8 +175,13 @@ async def add_litellm_data_to_request(
|
||||||
|
|
||||||
|
|
||||||
def _add_otel_traceparent_to_data(data: dict, request: Request):
|
def _add_otel_traceparent_to_data(data: dict, request: Request):
|
||||||
|
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||||
if data is None:
|
if data is None:
|
||||||
return
|
return
|
||||||
|
if open_telemetry_logger is None:
|
||||||
|
# if user is not use OTEL don't send extra_headers
|
||||||
|
# relevant issue: https://github.com/BerriAI/litellm/issues/4448
|
||||||
|
return
|
||||||
if request.headers:
|
if request.headers:
|
||||||
if "traceparent" in request.headers:
|
if "traceparent" in request.headers:
|
||||||
# we want to forward this to the LLM Provider
|
# we want to forward this to the LLM Provider
|
||||||
|
|
|
@ -23,7 +23,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
||||||
|
|
||||||
# litellm.num_retries=3
|
# litellm.num_retries = 3
|
||||||
litellm.cache = None
|
litellm.cache = None
|
||||||
litellm.success_callback = []
|
litellm.success_callback = []
|
||||||
user_message = "Write a short poem about the sky"
|
user_message = "Write a short poem about the sky"
|
||||||
|
|
|
@ -4,7 +4,9 @@ import traceback
|
||||||
|
|
||||||
import litellm.cost_calculator
|
import litellm.cost_calculator
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath("../..")) # Adds the parent directory to the system path
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import asyncio
|
import asyncio
|
||||||
import time
|
import time
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
@ -167,11 +169,15 @@ def test_cost_ft_gpt_35():
|
||||||
input_cost = model_cost["ft:gpt-3.5-turbo"]["input_cost_per_token"]
|
input_cost = model_cost["ft:gpt-3.5-turbo"]["input_cost_per_token"]
|
||||||
output_cost = model_cost["ft:gpt-3.5-turbo"]["output_cost_per_token"]
|
output_cost = model_cost["ft:gpt-3.5-turbo"]["output_cost_per_token"]
|
||||||
print(input_cost, output_cost)
|
print(input_cost, output_cost)
|
||||||
expected_cost = (input_cost * resp.usage.prompt_tokens) + (output_cost * resp.usage.completion_tokens)
|
expected_cost = (input_cost * resp.usage.prompt_tokens) + (
|
||||||
|
output_cost * resp.usage.completion_tokens
|
||||||
|
)
|
||||||
print("\n Excpected cost", expected_cost)
|
print("\n Excpected cost", expected_cost)
|
||||||
assert cost == expected_cost
|
assert cost == expected_cost
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Cost Calc failed for ft:gpt-3.5. Expected {expected_cost}, Calculated cost {cost}")
|
pytest.fail(
|
||||||
|
f"Cost Calc failed for ft:gpt-3.5. Expected {expected_cost}, Calculated cost {cost}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# test_cost_ft_gpt_35()
|
# test_cost_ft_gpt_35()
|
||||||
|
@ -200,15 +206,21 @@ def test_cost_azure_gpt_35():
|
||||||
usage=Usage(prompt_tokens=21, completion_tokens=17, total_tokens=38),
|
usage=Usage(prompt_tokens=21, completion_tokens=17, total_tokens=38),
|
||||||
)
|
)
|
||||||
|
|
||||||
cost = litellm.completion_cost(completion_response=resp, model="azure/gpt-35-turbo")
|
cost = litellm.completion_cost(
|
||||||
|
completion_response=resp, model="azure/gpt-35-turbo"
|
||||||
|
)
|
||||||
print("\n Calculated Cost for azure/gpt-3.5-turbo", cost)
|
print("\n Calculated Cost for azure/gpt-3.5-turbo", cost)
|
||||||
input_cost = model_cost["azure/gpt-35-turbo"]["input_cost_per_token"]
|
input_cost = model_cost["azure/gpt-35-turbo"]["input_cost_per_token"]
|
||||||
output_cost = model_cost["azure/gpt-35-turbo"]["output_cost_per_token"]
|
output_cost = model_cost["azure/gpt-35-turbo"]["output_cost_per_token"]
|
||||||
expected_cost = (input_cost * resp.usage.prompt_tokens) + (output_cost * resp.usage.completion_tokens)
|
expected_cost = (input_cost * resp.usage.prompt_tokens) + (
|
||||||
|
output_cost * resp.usage.completion_tokens
|
||||||
|
)
|
||||||
print("\n Excpected cost", expected_cost)
|
print("\n Excpected cost", expected_cost)
|
||||||
assert cost == expected_cost
|
assert cost == expected_cost
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Cost Calc failed for azure/gpt-3.5-turbo. Expected {expected_cost}, Calculated cost {cost}")
|
pytest.fail(
|
||||||
|
f"Cost Calc failed for azure/gpt-3.5-turbo. Expected {expected_cost}, Calculated cost {cost}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# test_cost_azure_gpt_35()
|
# test_cost_azure_gpt_35()
|
||||||
|
@ -239,7 +251,9 @@ def test_cost_azure_embedding():
|
||||||
assert cost == expected_cost
|
assert cost == expected_cost
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Cost Calc failed for azure/gpt-3.5-turbo. Expected {expected_cost}, Calculated cost {cost}")
|
pytest.fail(
|
||||||
|
f"Cost Calc failed for azure/gpt-3.5-turbo. Expected {expected_cost}, Calculated cost {cost}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# test_cost_azure_embedding()
|
# test_cost_azure_embedding()
|
||||||
|
@ -315,7 +329,9 @@ def test_cost_bedrock_pricing_actual_calls():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
model = "anthropic.claude-instant-v1"
|
model = "anthropic.claude-instant-v1"
|
||||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
response = litellm.completion(model=model, messages=messages, mock_response="hello cool one")
|
response = litellm.completion(
|
||||||
|
model=model, messages=messages, mock_response="hello cool one"
|
||||||
|
)
|
||||||
|
|
||||||
print("response", response)
|
print("response", response)
|
||||||
cost = litellm.completion_cost(
|
cost = litellm.completion_cost(
|
||||||
|
@ -345,7 +361,8 @@ def test_whisper_openai():
|
||||||
print(f"cost: {cost}")
|
print(f"cost: {cost}")
|
||||||
print(f"whisper dict: {litellm.model_cost['whisper-1']}")
|
print(f"whisper dict: {litellm.model_cost['whisper-1']}")
|
||||||
expected_cost = round(
|
expected_cost = round(
|
||||||
litellm.model_cost["whisper-1"]["output_cost_per_second"] * _total_time_in_seconds,
|
litellm.model_cost["whisper-1"]["output_cost_per_second"]
|
||||||
|
* _total_time_in_seconds,
|
||||||
5,
|
5,
|
||||||
)
|
)
|
||||||
assert cost == expected_cost
|
assert cost == expected_cost
|
||||||
|
@ -365,12 +382,15 @@ def test_whisper_azure():
|
||||||
_total_time_in_seconds = 3
|
_total_time_in_seconds = 3
|
||||||
|
|
||||||
transcription._response_ms = _total_time_in_seconds * 1000
|
transcription._response_ms = _total_time_in_seconds * 1000
|
||||||
cost = litellm.completion_cost(model="azure/azure-whisper", completion_response=transcription)
|
cost = litellm.completion_cost(
|
||||||
|
model="azure/azure-whisper", completion_response=transcription
|
||||||
|
)
|
||||||
|
|
||||||
print(f"cost: {cost}")
|
print(f"cost: {cost}")
|
||||||
print(f"whisper dict: {litellm.model_cost['whisper-1']}")
|
print(f"whisper dict: {litellm.model_cost['whisper-1']}")
|
||||||
expected_cost = round(
|
expected_cost = round(
|
||||||
litellm.model_cost["whisper-1"]["output_cost_per_second"] * _total_time_in_seconds,
|
litellm.model_cost["whisper-1"]["output_cost_per_second"]
|
||||||
|
* _total_time_in_seconds,
|
||||||
5,
|
5,
|
||||||
)
|
)
|
||||||
assert cost == expected_cost
|
assert cost == expected_cost
|
||||||
|
@ -401,7 +421,9 @@ def test_dalle_3_azure_cost_tracking():
|
||||||
response.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
response.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
||||||
response._hidden_params = {"model": "dall-e-3", "model_id": None}
|
response._hidden_params = {"model": "dall-e-3", "model_id": None}
|
||||||
print(f"response hidden params: {response._hidden_params}")
|
print(f"response hidden params: {response._hidden_params}")
|
||||||
cost = litellm.completion_cost(completion_response=response, call_type="image_generation")
|
cost = litellm.completion_cost(
|
||||||
|
completion_response=response, call_type="image_generation"
|
||||||
|
)
|
||||||
assert cost > 0
|
assert cost > 0
|
||||||
|
|
||||||
|
|
||||||
|
@ -433,7 +455,9 @@ def test_replicate_llama3_cost_tracking():
|
||||||
model="replicate/meta/meta-llama-3-8b-instruct",
|
model="replicate/meta/meta-llama-3-8b-instruct",
|
||||||
object="chat.completion",
|
object="chat.completion",
|
||||||
system_fingerprint=None,
|
system_fingerprint=None,
|
||||||
usage=litellm.utils.Usage(prompt_tokens=48, completion_tokens=31, total_tokens=79),
|
usage=litellm.utils.Usage(
|
||||||
|
prompt_tokens=48, completion_tokens=31, total_tokens=79
|
||||||
|
),
|
||||||
)
|
)
|
||||||
cost = litellm.completion_cost(
|
cost = litellm.completion_cost(
|
||||||
completion_response=response,
|
completion_response=response,
|
||||||
|
@ -443,8 +467,14 @@ def test_replicate_llama3_cost_tracking():
|
||||||
print(f"cost: {cost}")
|
print(f"cost: {cost}")
|
||||||
cost = round(cost, 5)
|
cost = round(cost, 5)
|
||||||
expected_cost = round(
|
expected_cost = round(
|
||||||
litellm.model_cost["replicate/meta/meta-llama-3-8b-instruct"]["input_cost_per_token"] * 48
|
litellm.model_cost["replicate/meta/meta-llama-3-8b-instruct"][
|
||||||
+ litellm.model_cost["replicate/meta/meta-llama-3-8b-instruct"]["output_cost_per_token"] * 31,
|
"input_cost_per_token"
|
||||||
|
]
|
||||||
|
* 48
|
||||||
|
+ litellm.model_cost["replicate/meta/meta-llama-3-8b-instruct"][
|
||||||
|
"output_cost_per_token"
|
||||||
|
]
|
||||||
|
* 31,
|
||||||
5,
|
5,
|
||||||
)
|
)
|
||||||
assert cost == expected_cost
|
assert cost == expected_cost
|
||||||
|
@ -538,7 +568,9 @@ def test_together_ai_qwen_completion_cost():
|
||||||
"custom_cost_per_second": None,
|
"custom_cost_per_second": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
response = litellm.cost_calculator.get_model_params_and_category(model_name="qwen/Qwen2-72B-Instruct")
|
response = litellm.cost_calculator.get_model_params_and_category(
|
||||||
|
model_name="qwen/Qwen2-72B-Instruct"
|
||||||
|
)
|
||||||
|
|
||||||
assert response == "together-ai-41.1b-80b"
|
assert response == "together-ai-41.1b-80b"
|
||||||
|
|
||||||
|
@ -576,8 +608,12 @@ def test_gemini_completion_cost(above_128k, provider):
|
||||||
), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
|
), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
|
||||||
model_name, model_info
|
model_name, model_info
|
||||||
)
|
)
|
||||||
input_cost = prompt_tokens * model_info["input_cost_per_token_above_128k_tokens"]
|
input_cost = (
|
||||||
output_cost = output_tokens * model_info["output_cost_per_token_above_128k_tokens"]
|
prompt_tokens * model_info["input_cost_per_token_above_128k_tokens"]
|
||||||
|
)
|
||||||
|
output_cost = (
|
||||||
|
output_tokens * model_info["output_cost_per_token_above_128k_tokens"]
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
input_cost = prompt_tokens * model_info["input_cost_per_token"]
|
input_cost = prompt_tokens * model_info["input_cost_per_token"]
|
||||||
output_cost = output_tokens * model_info["output_cost_per_token"]
|
output_cost = output_tokens * model_info["output_cost_per_token"]
|
||||||
|
@ -674,3 +710,11 @@ def test_vertex_ai_claude_completion_cost():
|
||||||
)
|
)
|
||||||
predicted_cost = input_tokens * 0.000003 + 0.000015 * output_tokens
|
predicted_cost = input_tokens * 0.000003 + 0.000015 * output_tokens
|
||||||
assert cost == predicted_cost
|
assert cost == predicted_cost
|
||||||
|
|
||||||
|
|
||||||
|
def test_vertex_ai_gemini_predict_cost():
|
||||||
|
model = "gemini-1.5-flash"
|
||||||
|
messages = [{"role": "user", "content": "Hey, hows it going???"}]
|
||||||
|
predictive_cost = completion_cost(model=model, messages=messages)
|
||||||
|
|
||||||
|
assert predictive_cost > 0
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
#### What this tests ####
|
#### What this tests ####
|
||||||
# This tests if prompts are being correctly formatted
|
# This tests if prompts are being correctly formatted
|
||||||
import sys
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath("../.."))
|
sys.path.insert(0, os.path.abspath("../.."))
|
||||||
|
@ -10,12 +11,13 @@ sys.path.insert(0, os.path.abspath("../.."))
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
from litellm.llms.prompt_templates.factory import (
|
from litellm.llms.prompt_templates.factory import (
|
||||||
anthropic_pt,
|
_bedrock_tools_pt,
|
||||||
anthropic_messages_pt,
|
anthropic_messages_pt,
|
||||||
|
anthropic_pt,
|
||||||
claude_2_1_pt,
|
claude_2_1_pt,
|
||||||
|
convert_url_to_base64,
|
||||||
llama_2_chat_pt,
|
llama_2_chat_pt,
|
||||||
prompt_factory,
|
prompt_factory,
|
||||||
_bedrock_tools_pt,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -153,3 +155,11 @@ def test_bedrock_tool_calling_pt():
|
||||||
converted_tools = _bedrock_tools_pt(tools=tools)
|
converted_tools = _bedrock_tools_pt(tools=tools)
|
||||||
|
|
||||||
print(converted_tools)
|
print(converted_tools)
|
||||||
|
|
||||||
|
|
||||||
|
def test_convert_url_to_img():
|
||||||
|
response_url = convert_url_to_base64(
|
||||||
|
url="https://images.pexels.com/photos/1319515/pexels-photo-1319515.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "image/jpeg" in response_url
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue