diff --git a/.circleci/config.yml b/.circleci/config.yml index acf8612eac..1a3c3b248e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -986,6 +986,7 @@ jobs: pip install ruff pip install pylint pip install pyright + pip install beautifulsoup4 pip install . curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash - run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1) @@ -995,6 +996,7 @@ jobs: - run: python ./tests/code_coverage_tests/recursive_detector.py - run: python ./tests/code_coverage_tests/test_router_strategy_async.py - run: python ./tests/code_coverage_tests/litellm_logging_code_coverage.py + - run: python ./tests/code_coverage_tests/bedrock_pricing.py - run: python ./tests/documentation_tests/test_env_keys.py - run: python ./tests/documentation_tests/test_router_settings.py - run: python ./tests/documentation_tests/test_api_docs.py diff --git a/litellm/__init__.py b/litellm/__init__.py index 814e04d741..9cd5cb8c4a 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -434,7 +434,6 @@ BEDROCK_CONVERSE_MODELS = [ "meta.llama3-2-3b-instruct-v1:0", "meta.llama3-2-11b-instruct-v1:0", "meta.llama3-2-90b-instruct-v1:0", - "meta.llama3-2-405b-instruct-v1:0", ] ####### COMPLETION MODELS ################### open_ai_chat_completion_models: List = [] @@ -1254,7 +1253,7 @@ from .proxy.proxy_cli import run_server from .router import Router from .assistants.main import * from .batches.main import * -from .batch_completion.main import * +from .batch_completion.main import * # type: ignore from .rerank_api.main import * from .realtime_api.main import _arealtime from .fine_tuning.main import * diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index beea141d24..55a50e04dd 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -5224,6 +5224,24 @@ "mode": "chat", "supports_system_messages": true }, + "ai21.jamba-1-5-large-v1:0": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "ai21.jamba-1-5-mini-v1:0": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000004, + "litellm_provider": "bedrock", + "mode": "chat" + }, "amazon.titan-text-lite-v1": { "max_tokens": 4000, "max_input_tokens": 42000, @@ -5542,8 +5560,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000005, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.000004, "litellm_provider": "bedrock", "mode": "chat", "supports_assistant_prefill": true, @@ -5612,8 +5630,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000005, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.000004, "litellm_provider": "bedrock", "mode": "chat", "supports_assistant_prefill": true, @@ -5682,8 +5700,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000005, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000125, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -6057,8 +6075,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 0.00000163, - "output_cost_per_token": 0.00000551, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.0000024, "litellm_provider": "bedrock", "mode": "chat" }, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index beea141d24..55a50e04dd 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -5224,6 +5224,24 @@ "mode": "chat", "supports_system_messages": true }, + "ai21.jamba-1-5-large-v1:0": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "ai21.jamba-1-5-mini-v1:0": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000004, + "litellm_provider": "bedrock", + "mode": "chat" + }, "amazon.titan-text-lite-v1": { "max_tokens": 4000, "max_input_tokens": 42000, @@ -5542,8 +5560,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000005, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.000004, "litellm_provider": "bedrock", "mode": "chat", "supports_assistant_prefill": true, @@ -5612,8 +5630,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000005, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.000004, "litellm_provider": "bedrock", "mode": "chat", "supports_assistant_prefill": true, @@ -5682,8 +5700,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000005, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000125, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -6057,8 +6075,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 0.00000163, - "output_cost_per_token": 0.00000551, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.0000024, "litellm_provider": "bedrock", "mode": "chat" }, diff --git a/tests/code_coverage_tests/bedrock_pricing.py b/tests/code_coverage_tests/bedrock_pricing.py new file mode 100644 index 0000000000..b2c9e78b06 --- /dev/null +++ b/tests/code_coverage_tests/bedrock_pricing.py @@ -0,0 +1,243 @@ +import os +import sys + +sys.path.insert(0, os.path.abspath("../..")) +import litellm +import requests +from bs4 import BeautifulSoup + +# URL of the AWS Bedrock Pricing page +PRICING_URL = "https://aws.amazon.com/bedrock/pricing/" + +# List of providers to extract pricing for +PROVIDERS = ["ai21", "anthropic", "meta", "cohere", "mistral", "stability", "amazon"] + + +def extract_amazon_pricing(section): + """ + Extracts pricing data for Amazon-specific models. + + Args: + section (Tag): The BeautifulSoup Tag object for the Amazon section. + + Returns: + dict: Pricing data for Amazon models. + """ + tabs = section.find_all("li", class_="lb-tabs-trigger") + panels = section.find_all("li", class_="lb-tabs-content-item") + + amazon_pricing = {} + + for tab, panel in zip(tabs, panels): + model_name = tab.get_text(strip=True) + table = panel.find("table") + if not table: + amazon_pricing[model_name] = "Pricing table not found" + continue + + # Parse the table + rows = table.find_all("tr") + headers = [header.get_text(strip=True) for header in rows[0].find_all("td")] + model_pricing = {} + + for row in rows[1:]: + cols = row.find_all("td") + if len(cols) < 3: + continue # Skip rows with insufficient data + + feature_name = cols[0].get_text(strip=True) + input_price = cols[1].get_text(strip=True) + output_price = cols[2].get_text(strip=True) + model_pricing[feature_name] = { + headers[1]: input_price, + headers[2]: output_price, + } + + amazon_pricing[model_name] = model_pricing + + return amazon_pricing + + +def get_bedrock_pricing(url, providers): + """ + Fetches and parses AWS Bedrock pricing for specified providers. + + Args: + url (str): URL of the AWS Bedrock pricing page. + providers (list): List of providers to extract pricing for. + + Returns: + dict: A dictionary containing pricing data for the providers. + """ + response = requests.get(url) + response.raise_for_status() + soup = BeautifulSoup(response.text, "html.parser") + + pricing_data = {} + + for provider in providers: + if provider == "amazon": + section = soup.find( + "li", + class_="lb-tabs-accordion-trigger", + text=lambda t: t and "Amazon" in t, + ) + if not section: + pricing_data[provider] = "Amazon section not found" + continue + + amazon_section = section.find_next("li", class_="lb-tabs-content-item") + if not amazon_section: + pricing_data[provider] = "Amazon models section not found" + continue + + pricing_data[provider] = extract_amazon_pricing(amazon_section) + else: + # General logic for other providers + section = soup.find( + "h2", text=lambda t: t and provider.lower() in t.lower() + ) + if not section: + pricing_data[provider] = "Provider section not found" + continue + + table = section.find_next("table") + if not table: + pricing_data[provider] = "Pricing table not found" + continue + + rows = table.find_all("tr") + headers = [header.get_text(strip=True) for header in rows[0].find_all("td")] + provider_pricing = {} + + for row in rows[1:]: + cols = row.find_all("td") + if len(cols) < 3: + continue + + model_name = cols[0].get_text(strip=True) + input_price = cols[1].get_text(strip=True) + output_price = cols[2].get_text(strip=True) + provider_pricing[model_name] = { + "Price per 1,000 input tokens": input_price, + "Price per 1,000 output tokens": output_price, + } + + pricing_data[provider] = provider_pricing + + return pricing_data + + +model_substring_map = { + "ai21": {"jurassic-2": "j2"}, + "anthropic": {"claude-2-1": "claude-v2:1", "claude-2-0": "claude-v2"}, + "meta": {"llama-2-chat-(13b)": "llama2-13b-chat"}, + "cohere": { + "r+": "r-plus", + "embed-3-english": "embed-english-v3", + "embed-3-multilingual": "embed-multilingual-v3", + }, +} # aliases used by bedrock in their real model name vs. pricing page + + +def _handle_meta_model_name(model_name: str) -> str: + # Check if it's a Llama 2 chat model + if "llama-2-chat-" in model_name.lower(): + # Extract the size (e.g., 13b, 70b) using string manipulation + # Look for pattern between "chat-(" and ")" + import re + + if match := re.search(r"chat-\((\d+b)\)", model_name.lower()): + size = match.group(1) + return f"meta.llama2-{size}-chat" + return model_name + + +def _handle_cohere_model_name(model_name: str) -> str: + if model_name.endswith("command-r"): + return "cohere.command-r-v1" + return model_name + + +def _create_bedrock_model_name(provider: str, model_name: str): + complete_model_name = f"{provider.lower()}.{model_name.replace(' ', '-').replace('.', '-').replace('*', '').lower()}" + for provider_key, map in model_substring_map.items(): + if provider_key == provider: + for model_substring, replacement in map.items(): + print( + f"model_substring: {model_substring}, replacement: {replacement}, received model_name: {model_name}" + ) + if model_substring in complete_model_name: + print(f"model_name: {complete_model_name}") + complete_model_name = complete_model_name.replace( + model_substring, replacement + ) + print(f"model_name: {complete_model_name}") + if provider == "meta": + complete_model_name = _handle_meta_model_name(complete_model_name) + if provider == "cohere": + complete_model_name = _handle_cohere_model_name(complete_model_name) + return complete_model_name + + +def _convert_str_to_float(price_str: str) -> float: + if "$" not in price_str: + return 0.0 + return float(price_str.replace("$", "")) + + +def _check_if_model_name_in_pricing( + bedrock_model_name: str, + input_cost_per_1k_tokens: str, + output_cost_per_1k_tokens: str, +): + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + for model, value in litellm.model_cost.items(): + if model.startswith(bedrock_model_name): + input_cost_per_token = ( + _convert_str_to_float(input_cost_per_1k_tokens) / 1000 + ) + output_cost_per_token = ( + _convert_str_to_float(output_cost_per_1k_tokens) / 1000 + ) + assert round(value["input_cost_per_token"], 10) == round( + input_cost_per_token, 10 + ), f"Invalid input cost per token for {model} \n Bedrock pricing page name={bedrock_model_name} \n Got={value['input_cost_per_token']}, Expected={input_cost_per_token}" + assert round(value["output_cost_per_token"], 10) == round( + output_cost_per_token, 10 + ), f"Invalid output cost per token for {model} \n Bedrock pricing page name={bedrock_model_name} \n Got={value['output_cost_per_token']}, Expected={output_cost_per_token}" + return True + return False + + +if __name__ == "__main__": + try: + pricing = get_bedrock_pricing(PRICING_URL, PROVIDERS) + print("AWS Bedrock On-Demand Pricing:") + for provider, data in pricing.items(): + print(f"\n{provider.capitalize()}:") + if isinstance(data, dict): + for model, details in data.items(): + complete_model_name = _create_bedrock_model_name(provider, model) + print(f"details: {details}") + assert _check_if_model_name_in_pricing( + bedrock_model_name=complete_model_name, + input_cost_per_1k_tokens=details[ + "Price per 1,000 input tokens" + ], + output_cost_per_1k_tokens=details[ + "Price per 1,000 output tokens" + ], + ), f"Model {complete_model_name} not found in litellm.model_cost" + print(f" {complete_model_name}:") + if isinstance(details, dict): + for detail, value in details.items(): + print(f" {detail}: {value}") + else: + print(f" {details}") + else: + print(f" {data}") + except requests.RequestException as e: + print(f"Error fetching pricing data: {e}") diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py index 16366ec641..8576a00d30 100644 --- a/tests/local_testing/test_completion_cost.py +++ b/tests/local_testing/test_completion_cost.py @@ -2766,12 +2766,64 @@ def test_add_known_models(): def test_bedrock_cost_calc_with_region(): from litellm import completion - response = completion( - model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", - messages=[{"role": "user", "content": "Hello, how are you?"}], - aws_region_name="us-east-1", - ) - assert response._hidden_params["response_cost"] > 0 + from litellm import ModelResponse + + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + hidden_params = { + "custom_llm_provider": "bedrock", + "region_name": "us-east-1", + "optional_params": {}, + "litellm_call_id": "cf371a5d-679b-410f-b862-8084676d6d59", + "model_id": None, + "api_base": None, + "response_cost": 0.0005639999999999999, + "additional_headers": {}, + } + + litellm.set_verbose = True + + bedrock_models = litellm.bedrock_models + litellm.bedrock_converse_models + + for model in bedrock_models: + if litellm.model_cost[model]["mode"] == "chat": + response = { + "id": "cmpl-55db75e0b05344058b0bd8ee4e00bf84", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": None, + "message": { + "content": 'Here\'s one:\n\nWhy did the Linux kernel go to therapy?\n\nBecause it had a lot of "core" issues!\n\nHope that one made you laugh!', + "refusal": None, + "role": "assistant", + "audio": None, + "function_call": None, + "tool_calls": [], + }, + } + ], + "created": 1729243714, + "model": model, + "object": "chat.completion", + "service_tier": None, + "system_fingerprint": None, + "usage": { + "completion_tokens": 32, + "prompt_tokens": 16, + "total_tokens": 48, + "completion_tokens_details": None, + "prompt_tokens_details": None, + }, + } + + model_response = ModelResponse(**response) + model_response._hidden_params = hidden_params + cost = completion_cost(model_response, custom_llm_provider="bedrock") + + assert cost > 0 # @pytest.mark.parametrize(