import os import sys sys.path.insert(0, os.path.abspath("../..")) import litellm import requests from bs4 import BeautifulSoup # URL of the AWS Bedrock Pricing page PRICING_URL = "https://aws.amazon.com/bedrock/pricing/" # List of providers to extract pricing for PROVIDERS = ["ai21", "anthropic", "meta", "cohere", "mistral", "stability", "amazon"] def extract_amazon_pricing(section): """ Extracts pricing data for Amazon-specific models. Args: section (Tag): The BeautifulSoup Tag object for the Amazon section. Returns: dict: Pricing data for Amazon models. """ tabs = section.find_all("li", class_="lb-tabs-trigger") panels = section.find_all("li", class_="lb-tabs-content-item") amazon_pricing = {} for tab, panel in zip(tabs, panels): model_name = tab.get_text(strip=True) table = panel.find("table") if not table: amazon_pricing[model_name] = "Pricing table not found" continue # Parse the table rows = table.find_all("tr") headers = [header.get_text(strip=True) for header in rows[0].find_all("td")] model_pricing = {} for row in rows[1:]: cols = row.find_all("td") if len(cols) < 3: continue # Skip rows with insufficient data feature_name = cols[0].get_text(strip=True) input_price = cols[1].get_text(strip=True) output_price = cols[2].get_text(strip=True) model_pricing[feature_name] = { headers[1]: input_price, headers[2]: output_price, } amazon_pricing[model_name] = model_pricing return amazon_pricing def get_bedrock_pricing(url, providers): """ Fetches and parses AWS Bedrock pricing for specified providers. Args: url (str): URL of the AWS Bedrock pricing page. providers (list): List of providers to extract pricing for. Returns: dict: A dictionary containing pricing data for the providers. """ response = requests.get(url) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") pricing_data = {} for provider in providers: if provider == "amazon": section = soup.find( "li", class_="lb-tabs-accordion-trigger", text=lambda t: t and "Amazon" in t, ) if not section: pricing_data[provider] = "Amazon section not found" continue amazon_section = section.find_next("li", class_="lb-tabs-content-item") if not amazon_section: pricing_data[provider] = "Amazon models section not found" continue pricing_data[provider] = extract_amazon_pricing(amazon_section) else: # General logic for other providers section = soup.find( "h2", text=lambda t: t and provider.lower() in t.lower() ) if not section: pricing_data[provider] = "Provider section not found" continue table = section.find_next("table") if not table: pricing_data[provider] = "Pricing table not found" continue rows = table.find_all("tr") headers = [header.get_text(strip=True) for header in rows[0].find_all("td")] provider_pricing = {} for row in rows[1:]: cols = row.find_all("td") if len(cols) < 3: continue model_name = cols[0].get_text(strip=True) input_price = cols[1].get_text(strip=True) output_price = cols[2].get_text(strip=True) provider_pricing[model_name] = { "Price per 1,000 input tokens": input_price, "Price per 1,000 output tokens": output_price, } pricing_data[provider] = provider_pricing return pricing_data model_substring_map = { "ai21": {"jurassic-2": "j2"}, "anthropic": {"claude-2-1": "claude-v2:1", "claude-2-0": "claude-v2"}, "meta": {"llama-2-chat-(13b)": "llama2-13b-chat"}, "cohere": { "r+": "r-plus", "embed-3-english": "embed-english-v3", "embed-3-multilingual": "embed-multilingual-v3", }, } # aliases used by bedrock in their real model name vs. pricing page def _handle_meta_model_name(model_name: str) -> str: # Check if it's a Llama 2 chat model if "llama-2-chat-" in model_name.lower(): # Extract the size (e.g., 13b, 70b) using string manipulation # Look for pattern between "chat-(" and ")" import re if match := re.search(r"chat-\((\d+b)\)", model_name.lower()): size = match.group(1) return f"meta.llama2-{size}-chat" return model_name def _handle_cohere_model_name(model_name: str) -> str: if model_name.endswith("command-r"): return "cohere.command-r-v1" return model_name def _create_bedrock_model_name(provider: str, model_name: str): complete_model_name = f"{provider.lower()}.{model_name.replace(' ', '-').replace('.', '-').replace('*', '').lower()}" for provider_key, map in model_substring_map.items(): if provider_key == provider: for model_substring, replacement in map.items(): print( f"model_substring: {model_substring}, replacement: {replacement}, received model_name: {model_name}" ) if model_substring in complete_model_name: print(f"model_name: {complete_model_name}") complete_model_name = complete_model_name.replace( model_substring, replacement ) print(f"model_name: {complete_model_name}") if provider == "meta": complete_model_name = _handle_meta_model_name(complete_model_name) if provider == "cohere": complete_model_name = _handle_cohere_model_name(complete_model_name) return complete_model_name def _convert_str_to_float(price_str: str) -> float: if "$" not in price_str: return 0.0 return float(price_str.replace("$", "")) def _check_if_model_name_in_pricing( bedrock_model_name: str, input_cost_per_1k_tokens: str, output_cost_per_1k_tokens: str, ): os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" litellm.model_cost = litellm.get_model_cost_map(url="") for model, value in litellm.model_cost.items(): if model.startswith(bedrock_model_name): input_cost_per_token = ( _convert_str_to_float(input_cost_per_1k_tokens) / 1000 ) output_cost_per_token = ( _convert_str_to_float(output_cost_per_1k_tokens) / 1000 ) assert round(value["input_cost_per_token"], 10) == round( input_cost_per_token, 10 ), f"Invalid input cost per token for {model} \n Bedrock pricing page name={bedrock_model_name} \n Got={value['input_cost_per_token']}, Expected={input_cost_per_token}" assert round(value["output_cost_per_token"], 10) == round( output_cost_per_token, 10 ), f"Invalid output cost per token for {model} \n Bedrock pricing page name={bedrock_model_name} \n Got={value['output_cost_per_token']}, Expected={output_cost_per_token}" return True return False if __name__ == "__main__": try: pricing = get_bedrock_pricing(PRICING_URL, PROVIDERS) print("AWS Bedrock On-Demand Pricing:") for provider, data in pricing.items(): print(f"\n{provider.capitalize()}:") if isinstance(data, dict): for model, details in data.items(): complete_model_name = _create_bedrock_model_name(provider, model) print(f"details: {details}") assert _check_if_model_name_in_pricing( bedrock_model_name=complete_model_name, input_cost_per_1k_tokens=details[ "Price per 1,000 input tokens" ], output_cost_per_1k_tokens=details[ "Price per 1,000 output tokens" ], ), f"Model {complete_model_name} not found in litellm.model_cost" print(f" {complete_model_name}:") if isinstance(details, dict): for detail, value in details.items(): print(f" {detail}: {value}") else: print(f" {details}") else: print(f" {data}") except requests.RequestException as e: print(f"Error fetching pricing data: {e}")