mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
Fix bedrock model pricing + add unit test using bedrock pricing api (#7978)
* test(test_completion_cost.py): add unit testing to ensure all bedrock models with region name have cost tracked * feat: initial script to get bedrock pricing from amazon api ensures bedrock pricing is accurate * build(model_prices_and_context_window.json): correct bedrock model prices based on api check ensures accurate bedrock pricing * ci(config.yml): add bedrock pricing check to ci/cd ensures litellm always maintains up-to-date pricing for bedrock models * ci(config.yml): add beautiful soup to ci/cd * test: bump groq model * test: fix test
This commit is contained in:
parent
8eaa5dc797
commit
9c20c69915
6 changed files with 356 additions and 24 deletions
|
@ -986,6 +986,7 @@ jobs:
|
|||
pip install ruff
|
||||
pip install pylint
|
||||
pip install pyright
|
||||
pip install beautifulsoup4
|
||||
pip install .
|
||||
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||
- run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
|
||||
|
@ -995,6 +996,7 @@ jobs:
|
|||
- run: python ./tests/code_coverage_tests/recursive_detector.py
|
||||
- run: python ./tests/code_coverage_tests/test_router_strategy_async.py
|
||||
- run: python ./tests/code_coverage_tests/litellm_logging_code_coverage.py
|
||||
- run: python ./tests/code_coverage_tests/bedrock_pricing.py
|
||||
- run: python ./tests/documentation_tests/test_env_keys.py
|
||||
- run: python ./tests/documentation_tests/test_router_settings.py
|
||||
- run: python ./tests/documentation_tests/test_api_docs.py
|
||||
|
|
|
@ -434,7 +434,6 @@ BEDROCK_CONVERSE_MODELS = [
|
|||
"meta.llama3-2-3b-instruct-v1:0",
|
||||
"meta.llama3-2-11b-instruct-v1:0",
|
||||
"meta.llama3-2-90b-instruct-v1:0",
|
||||
"meta.llama3-2-405b-instruct-v1:0",
|
||||
]
|
||||
####### COMPLETION MODELS ###################
|
||||
open_ai_chat_completion_models: List = []
|
||||
|
@ -1254,7 +1253,7 @@ from .proxy.proxy_cli import run_server
|
|||
from .router import Router
|
||||
from .assistants.main import *
|
||||
from .batches.main import *
|
||||
from .batch_completion.main import *
|
||||
from .batch_completion.main import * # type: ignore
|
||||
from .rerank_api.main import *
|
||||
from .realtime_api.main import _arealtime
|
||||
from .fine_tuning.main import *
|
||||
|
|
|
@ -5224,6 +5224,24 @@
|
|||
"mode": "chat",
|
||||
"supports_system_messages": true
|
||||
},
|
||||
"ai21.jamba-1-5-large-v1:0": {
|
||||
"max_tokens": 256000,
|
||||
"max_input_tokens": 256000,
|
||||
"max_output_tokens": 256000,
|
||||
"input_cost_per_token": 0.000002,
|
||||
"output_cost_per_token": 0.000008,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat"
|
||||
},
|
||||
"ai21.jamba-1-5-mini-v1:0": {
|
||||
"max_tokens": 256000,
|
||||
"max_input_tokens": 256000,
|
||||
"max_output_tokens": 256000,
|
||||
"input_cost_per_token": 0.0000002,
|
||||
"output_cost_per_token": 0.0000004,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat"
|
||||
},
|
||||
"amazon.titan-text-lite-v1": {
|
||||
"max_tokens": 4000,
|
||||
"max_input_tokens": 42000,
|
||||
|
@ -5542,8 +5560,8 @@
|
|||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000005,
|
||||
"input_cost_per_token": 0.0000008,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_assistant_prefill": true,
|
||||
|
@ -5612,8 +5630,8 @@
|
|||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000005,
|
||||
"input_cost_per_token": 0.0000008,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_assistant_prefill": true,
|
||||
|
@ -5682,8 +5700,8 @@
|
|||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000005,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.00000125,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
|
@ -6057,8 +6075,8 @@
|
|||
"max_tokens": 8191,
|
||||
"max_input_tokens": 100000,
|
||||
"max_output_tokens": 8191,
|
||||
"input_cost_per_token": 0.00000163,
|
||||
"output_cost_per_token": 0.00000551,
|
||||
"input_cost_per_token": 0.0000008,
|
||||
"output_cost_per_token": 0.0000024,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat"
|
||||
},
|
||||
|
|
|
@ -5224,6 +5224,24 @@
|
|||
"mode": "chat",
|
||||
"supports_system_messages": true
|
||||
},
|
||||
"ai21.jamba-1-5-large-v1:0": {
|
||||
"max_tokens": 256000,
|
||||
"max_input_tokens": 256000,
|
||||
"max_output_tokens": 256000,
|
||||
"input_cost_per_token": 0.000002,
|
||||
"output_cost_per_token": 0.000008,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat"
|
||||
},
|
||||
"ai21.jamba-1-5-mini-v1:0": {
|
||||
"max_tokens": 256000,
|
||||
"max_input_tokens": 256000,
|
||||
"max_output_tokens": 256000,
|
||||
"input_cost_per_token": 0.0000002,
|
||||
"output_cost_per_token": 0.0000004,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat"
|
||||
},
|
||||
"amazon.titan-text-lite-v1": {
|
||||
"max_tokens": 4000,
|
||||
"max_input_tokens": 42000,
|
||||
|
@ -5542,8 +5560,8 @@
|
|||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000005,
|
||||
"input_cost_per_token": 0.0000008,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_assistant_prefill": true,
|
||||
|
@ -5612,8 +5630,8 @@
|
|||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000005,
|
||||
"input_cost_per_token": 0.0000008,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_assistant_prefill": true,
|
||||
|
@ -5682,8 +5700,8 @@
|
|||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000005,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.00000125,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
|
@ -6057,8 +6075,8 @@
|
|||
"max_tokens": 8191,
|
||||
"max_input_tokens": 100000,
|
||||
"max_output_tokens": 8191,
|
||||
"input_cost_per_token": 0.00000163,
|
||||
"output_cost_per_token": 0.00000551,
|
||||
"input_cost_per_token": 0.0000008,
|
||||
"output_cost_per_token": 0.0000024,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat"
|
||||
},
|
||||
|
|
243
tests/code_coverage_tests/bedrock_pricing.py
Normal file
243
tests/code_coverage_tests/bedrock_pricing.py
Normal file
|
@ -0,0 +1,243 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
import litellm
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# URL of the AWS Bedrock Pricing page
|
||||
PRICING_URL = "https://aws.amazon.com/bedrock/pricing/"
|
||||
|
||||
# List of providers to extract pricing for
|
||||
PROVIDERS = ["ai21", "anthropic", "meta", "cohere", "mistral", "stability", "amazon"]
|
||||
|
||||
|
||||
def extract_amazon_pricing(section):
|
||||
"""
|
||||
Extracts pricing data for Amazon-specific models.
|
||||
|
||||
Args:
|
||||
section (Tag): The BeautifulSoup Tag object for the Amazon section.
|
||||
|
||||
Returns:
|
||||
dict: Pricing data for Amazon models.
|
||||
"""
|
||||
tabs = section.find_all("li", class_="lb-tabs-trigger")
|
||||
panels = section.find_all("li", class_="lb-tabs-content-item")
|
||||
|
||||
amazon_pricing = {}
|
||||
|
||||
for tab, panel in zip(tabs, panels):
|
||||
model_name = tab.get_text(strip=True)
|
||||
table = panel.find("table")
|
||||
if not table:
|
||||
amazon_pricing[model_name] = "Pricing table not found"
|
||||
continue
|
||||
|
||||
# Parse the table
|
||||
rows = table.find_all("tr")
|
||||
headers = [header.get_text(strip=True) for header in rows[0].find_all("td")]
|
||||
model_pricing = {}
|
||||
|
||||
for row in rows[1:]:
|
||||
cols = row.find_all("td")
|
||||
if len(cols) < 3:
|
||||
continue # Skip rows with insufficient data
|
||||
|
||||
feature_name = cols[0].get_text(strip=True)
|
||||
input_price = cols[1].get_text(strip=True)
|
||||
output_price = cols[2].get_text(strip=True)
|
||||
model_pricing[feature_name] = {
|
||||
headers[1]: input_price,
|
||||
headers[2]: output_price,
|
||||
}
|
||||
|
||||
amazon_pricing[model_name] = model_pricing
|
||||
|
||||
return amazon_pricing
|
||||
|
||||
|
||||
def get_bedrock_pricing(url, providers):
|
||||
"""
|
||||
Fetches and parses AWS Bedrock pricing for specified providers.
|
||||
|
||||
Args:
|
||||
url (str): URL of the AWS Bedrock pricing page.
|
||||
providers (list): List of providers to extract pricing for.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing pricing data for the providers.
|
||||
"""
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
pricing_data = {}
|
||||
|
||||
for provider in providers:
|
||||
if provider == "amazon":
|
||||
section = soup.find(
|
||||
"li",
|
||||
class_="lb-tabs-accordion-trigger",
|
||||
text=lambda t: t and "Amazon" in t,
|
||||
)
|
||||
if not section:
|
||||
pricing_data[provider] = "Amazon section not found"
|
||||
continue
|
||||
|
||||
amazon_section = section.find_next("li", class_="lb-tabs-content-item")
|
||||
if not amazon_section:
|
||||
pricing_data[provider] = "Amazon models section not found"
|
||||
continue
|
||||
|
||||
pricing_data[provider] = extract_amazon_pricing(amazon_section)
|
||||
else:
|
||||
# General logic for other providers
|
||||
section = soup.find(
|
||||
"h2", text=lambda t: t and provider.lower() in t.lower()
|
||||
)
|
||||
if not section:
|
||||
pricing_data[provider] = "Provider section not found"
|
||||
continue
|
||||
|
||||
table = section.find_next("table")
|
||||
if not table:
|
||||
pricing_data[provider] = "Pricing table not found"
|
||||
continue
|
||||
|
||||
rows = table.find_all("tr")
|
||||
headers = [header.get_text(strip=True) for header in rows[0].find_all("td")]
|
||||
provider_pricing = {}
|
||||
|
||||
for row in rows[1:]:
|
||||
cols = row.find_all("td")
|
||||
if len(cols) < 3:
|
||||
continue
|
||||
|
||||
model_name = cols[0].get_text(strip=True)
|
||||
input_price = cols[1].get_text(strip=True)
|
||||
output_price = cols[2].get_text(strip=True)
|
||||
provider_pricing[model_name] = {
|
||||
"Price per 1,000 input tokens": input_price,
|
||||
"Price per 1,000 output tokens": output_price,
|
||||
}
|
||||
|
||||
pricing_data[provider] = provider_pricing
|
||||
|
||||
return pricing_data
|
||||
|
||||
|
||||
model_substring_map = {
|
||||
"ai21": {"jurassic-2": "j2"},
|
||||
"anthropic": {"claude-2-1": "claude-v2:1", "claude-2-0": "claude-v2"},
|
||||
"meta": {"llama-2-chat-(13b)": "llama2-13b-chat"},
|
||||
"cohere": {
|
||||
"r+": "r-plus",
|
||||
"embed-3-english": "embed-english-v3",
|
||||
"embed-3-multilingual": "embed-multilingual-v3",
|
||||
},
|
||||
} # aliases used by bedrock in their real model name vs. pricing page
|
||||
|
||||
|
||||
def _handle_meta_model_name(model_name: str) -> str:
|
||||
# Check if it's a Llama 2 chat model
|
||||
if "llama-2-chat-" in model_name.lower():
|
||||
# Extract the size (e.g., 13b, 70b) using string manipulation
|
||||
# Look for pattern between "chat-(" and ")"
|
||||
import re
|
||||
|
||||
if match := re.search(r"chat-\((\d+b)\)", model_name.lower()):
|
||||
size = match.group(1)
|
||||
return f"meta.llama2-{size}-chat"
|
||||
return model_name
|
||||
|
||||
|
||||
def _handle_cohere_model_name(model_name: str) -> str:
|
||||
if model_name.endswith("command-r"):
|
||||
return "cohere.command-r-v1"
|
||||
return model_name
|
||||
|
||||
|
||||
def _create_bedrock_model_name(provider: str, model_name: str):
|
||||
complete_model_name = f"{provider.lower()}.{model_name.replace(' ', '-').replace('.', '-').replace('*', '').lower()}"
|
||||
for provider_key, map in model_substring_map.items():
|
||||
if provider_key == provider:
|
||||
for model_substring, replacement in map.items():
|
||||
print(
|
||||
f"model_substring: {model_substring}, replacement: {replacement}, received model_name: {model_name}"
|
||||
)
|
||||
if model_substring in complete_model_name:
|
||||
print(f"model_name: {complete_model_name}")
|
||||
complete_model_name = complete_model_name.replace(
|
||||
model_substring, replacement
|
||||
)
|
||||
print(f"model_name: {complete_model_name}")
|
||||
if provider == "meta":
|
||||
complete_model_name = _handle_meta_model_name(complete_model_name)
|
||||
if provider == "cohere":
|
||||
complete_model_name = _handle_cohere_model_name(complete_model_name)
|
||||
return complete_model_name
|
||||
|
||||
|
||||
def _convert_str_to_float(price_str: str) -> float:
|
||||
if "$" not in price_str:
|
||||
return 0.0
|
||||
return float(price_str.replace("$", ""))
|
||||
|
||||
|
||||
def _check_if_model_name_in_pricing(
|
||||
bedrock_model_name: str,
|
||||
input_cost_per_1k_tokens: str,
|
||||
output_cost_per_1k_tokens: str,
|
||||
):
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
for model, value in litellm.model_cost.items():
|
||||
if model.startswith(bedrock_model_name):
|
||||
input_cost_per_token = (
|
||||
_convert_str_to_float(input_cost_per_1k_tokens) / 1000
|
||||
)
|
||||
output_cost_per_token = (
|
||||
_convert_str_to_float(output_cost_per_1k_tokens) / 1000
|
||||
)
|
||||
assert round(value["input_cost_per_token"], 10) == round(
|
||||
input_cost_per_token, 10
|
||||
), f"Invalid input cost per token for {model} \n Bedrock pricing page name={bedrock_model_name} \n Got={value['input_cost_per_token']}, Expected={input_cost_per_token}"
|
||||
assert round(value["output_cost_per_token"], 10) == round(
|
||||
output_cost_per_token, 10
|
||||
), f"Invalid output cost per token for {model} \n Bedrock pricing page name={bedrock_model_name} \n Got={value['output_cost_per_token']}, Expected={output_cost_per_token}"
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
pricing = get_bedrock_pricing(PRICING_URL, PROVIDERS)
|
||||
print("AWS Bedrock On-Demand Pricing:")
|
||||
for provider, data in pricing.items():
|
||||
print(f"\n{provider.capitalize()}:")
|
||||
if isinstance(data, dict):
|
||||
for model, details in data.items():
|
||||
complete_model_name = _create_bedrock_model_name(provider, model)
|
||||
print(f"details: {details}")
|
||||
assert _check_if_model_name_in_pricing(
|
||||
bedrock_model_name=complete_model_name,
|
||||
input_cost_per_1k_tokens=details[
|
||||
"Price per 1,000 input tokens"
|
||||
],
|
||||
output_cost_per_1k_tokens=details[
|
||||
"Price per 1,000 output tokens"
|
||||
],
|
||||
), f"Model {complete_model_name} not found in litellm.model_cost"
|
||||
print(f" {complete_model_name}:")
|
||||
if isinstance(details, dict):
|
||||
for detail, value in details.items():
|
||||
print(f" {detail}: {value}")
|
||||
else:
|
||||
print(f" {details}")
|
||||
else:
|
||||
print(f" {data}")
|
||||
except requests.RequestException as e:
|
||||
print(f"Error fetching pricing data: {e}")
|
|
@ -2766,12 +2766,64 @@ def test_add_known_models():
|
|||
def test_bedrock_cost_calc_with_region():
|
||||
from litellm import completion
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||
aws_region_name="us-east-1",
|
||||
)
|
||||
assert response._hidden_params["response_cost"] > 0
|
||||
from litellm import ModelResponse
|
||||
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
hidden_params = {
|
||||
"custom_llm_provider": "bedrock",
|
||||
"region_name": "us-east-1",
|
||||
"optional_params": {},
|
||||
"litellm_call_id": "cf371a5d-679b-410f-b862-8084676d6d59",
|
||||
"model_id": None,
|
||||
"api_base": None,
|
||||
"response_cost": 0.0005639999999999999,
|
||||
"additional_headers": {},
|
||||
}
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
bedrock_models = litellm.bedrock_models + litellm.bedrock_converse_models
|
||||
|
||||
for model in bedrock_models:
|
||||
if litellm.model_cost[model]["mode"] == "chat":
|
||||
response = {
|
||||
"id": "cmpl-55db75e0b05344058b0bd8ee4e00bf84",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": None,
|
||||
"message": {
|
||||
"content": 'Here\'s one:\n\nWhy did the Linux kernel go to therapy?\n\nBecause it had a lot of "core" issues!\n\nHope that one made you laugh!',
|
||||
"refusal": None,
|
||||
"role": "assistant",
|
||||
"audio": None,
|
||||
"function_call": None,
|
||||
"tool_calls": [],
|
||||
},
|
||||
}
|
||||
],
|
||||
"created": 1729243714,
|
||||
"model": model,
|
||||
"object": "chat.completion",
|
||||
"service_tier": None,
|
||||
"system_fingerprint": None,
|
||||
"usage": {
|
||||
"completion_tokens": 32,
|
||||
"prompt_tokens": 16,
|
||||
"total_tokens": 48,
|
||||
"completion_tokens_details": None,
|
||||
"prompt_tokens_details": None,
|
||||
},
|
||||
}
|
||||
|
||||
model_response = ModelResponse(**response)
|
||||
model_response._hidden_params = hidden_params
|
||||
cost = completion_cost(model_response, custom_llm_provider="bedrock")
|
||||
|
||||
assert cost > 0
|
||||
|
||||
|
||||
# @pytest.mark.parametrize(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue