Fix bedrock model pricing + add unit test using bedrock pricing api (#7978)

* test(test_completion_cost.py): add unit testing to ensure all bedrock models with region name have cost tracked * feat: initial script to get bedrock pricing from amazon api ensures bedrock pricing is accurate * build(model_prices_and_context_window.json): correct bedrock model prices based on api check ensures accurate bedrock pricing * ci(config.yml): add bedrock pricing check to ci/cd ensures litellm always maintains up-to-date pricing for bedrock models * ci(config.yml): add beautiful soup to ci/cd * test: bump groq model * test: fix test
2025-04-25 02:34:29 +00:00 · 2025-01-28 17:57:49 -08:00 · 2025-01-28 17:57:49 -08:00 · 9c20c69915
commit 9c20c69915
parent 8eaa5dc797
6 changed files with 356 additions and 24 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -986,6 +986,7 @@ jobs:
            pip install ruff
            pip install pylint
            pip install pyright
+            pip install beautifulsoup4
            pip install .
            curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
      - run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
@ -995,6 +996,7 @@ jobs:
      - run: python ./tests/code_coverage_tests/recursive_detector.py
      - run: python ./tests/code_coverage_tests/test_router_strategy_async.py
      - run: python ./tests/code_coverage_tests/litellm_logging_code_coverage.py
+      - run: python ./tests/code_coverage_tests/bedrock_pricing.py
      - run: python ./tests/documentation_tests/test_env_keys.py
      - run: python ./tests/documentation_tests/test_router_settings.py
      - run: python ./tests/documentation_tests/test_api_docs.py
--- a/litellm/init.py
+++ b/litellm/init.py
@ -434,7 +434,6 @@ BEDROCK_CONVERSE_MODELS = [
    "meta.llama3-2-3b-instruct-v1:0",
    "meta.llama3-2-11b-instruct-v1:0",
    "meta.llama3-2-90b-instruct-v1:0",
-    "meta.llama3-2-405b-instruct-v1:0",
 ]
 ####### COMPLETION MODELS ###################
 open_ai_chat_completion_models: List = []
@ -1254,7 +1253,7 @@ from .proxy.proxy_cli import run_server
 from .router import Router
 from .assistants.main import *
 from .batches.main import *
-from .batch_completion.main import *
+from .batch_completion.main import *  # type: ignore
 from .rerank_api.main import *
 from .realtime_api.main import _arealtime
 from .fine_tuning.main import *
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -5224,6 +5224,24 @@
        "mode": "chat",
        "supports_system_messages": true
    },
+    "ai21.jamba-1-5-large-v1:0": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000008,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "ai21.jamba-1-5-mini-v1:0": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 0.0000002,
+        "output_cost_per_token": 0.0000004,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
    "amazon.titan-text-lite-v1": {
        "max_tokens": 4000, 
        "max_input_tokens": 42000,
@ -5542,8 +5560,8 @@
        "max_tokens": 8192,
        "max_input_tokens": 200000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000005,
+        "input_cost_per_token": 0.0000008,
+        "output_cost_per_token": 0.000004,
        "litellm_provider": "bedrock",
        "mode": "chat",
        "supports_assistant_prefill": true,
@ -5612,8 +5630,8 @@
        "max_tokens": 8192,
        "max_input_tokens": 200000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000005,
+        "input_cost_per_token": 0.0000008,
+        "output_cost_per_token": 0.000004,
        "litellm_provider": "bedrock",
        "mode": "chat",
        "supports_assistant_prefill": true,
@ -5682,8 +5700,8 @@
        "max_tokens": 8192,
        "max_input_tokens": 200000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000005,
+        "input_cost_per_token": 0.00000025,
+        "output_cost_per_token": 0.00000125,
        "litellm_provider": "bedrock",
        "mode": "chat",
        "supports_function_calling": true,
@ -6057,8 +6075,8 @@
        "max_tokens": 8191, 
        "max_input_tokens": 100000, 
        "max_output_tokens": 8191,
-        "input_cost_per_token": 0.00000163,
-        "output_cost_per_token": 0.00000551,
+        "input_cost_per_token": 0.0000008,
+        "output_cost_per_token": 0.0000024,
        "litellm_provider": "bedrock",
        "mode": "chat"
    },
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -5224,6 +5224,24 @@
        "mode": "chat",
        "supports_system_messages": true
    },
+    "ai21.jamba-1-5-large-v1:0": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000008,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "ai21.jamba-1-5-mini-v1:0": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 0.0000002,
+        "output_cost_per_token": 0.0000004,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
    "amazon.titan-text-lite-v1": {
        "max_tokens": 4000, 
        "max_input_tokens": 42000,
@ -5542,8 +5560,8 @@
        "max_tokens": 8192,
        "max_input_tokens": 200000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000005,
+        "input_cost_per_token": 0.0000008,
+        "output_cost_per_token": 0.000004,
        "litellm_provider": "bedrock",
        "mode": "chat",
        "supports_assistant_prefill": true,
@ -5612,8 +5630,8 @@
        "max_tokens": 8192,
        "max_input_tokens": 200000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000005,
+        "input_cost_per_token": 0.0000008,
+        "output_cost_per_token": 0.000004,
        "litellm_provider": "bedrock",
        "mode": "chat",
        "supports_assistant_prefill": true,
@ -5682,8 +5700,8 @@
        "max_tokens": 8192,
        "max_input_tokens": 200000,
        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000005,
+        "input_cost_per_token": 0.00000025,
+        "output_cost_per_token": 0.00000125,
        "litellm_provider": "bedrock",
        "mode": "chat",
        "supports_function_calling": true,
@ -6057,8 +6075,8 @@
        "max_tokens": 8191, 
        "max_input_tokens": 100000, 
        "max_output_tokens": 8191,
-        "input_cost_per_token": 0.00000163,
-        "output_cost_per_token": 0.00000551,
+        "input_cost_per_token": 0.0000008,
+        "output_cost_per_token": 0.0000024,
        "litellm_provider": "bedrock",
        "mode": "chat"
    },
--- a/tests/code_coverage_tests/bedrock_pricing.py
+++ b/tests/code_coverage_tests/bedrock_pricing.py
@ -0,0 +1,243 @@
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath("../.."))
+import litellm
+import requests
+from bs4 import BeautifulSoup
+
+# URL of the AWS Bedrock Pricing page
+PRICING_URL = "https://aws.amazon.com/bedrock/pricing/"
+
+# List of providers to extract pricing for
+PROVIDERS = ["ai21", "anthropic", "meta", "cohere", "mistral", "stability", "amazon"]
+
+
+def extract_amazon_pricing(section):
+    """
+    Extracts pricing data for Amazon-specific models.
+
+    Args:
+        section (Tag): The BeautifulSoup Tag object for the Amazon section.
+
+    Returns:
+        dict: Pricing data for Amazon models.
+    """
+    tabs = section.find_all("li", class_="lb-tabs-trigger")
+    panels = section.find_all("li", class_="lb-tabs-content-item")
+
+    amazon_pricing = {}
+
+    for tab, panel in zip(tabs, panels):
+        model_name = tab.get_text(strip=True)
+        table = panel.find("table")
+        if not table:
+            amazon_pricing[model_name] = "Pricing table not found"
+            continue
+
+        # Parse the table
+        rows = table.find_all("tr")
+        headers = [header.get_text(strip=True) for header in rows[0].find_all("td")]
+        model_pricing = {}
+
+        for row in rows[1:]:
+            cols = row.find_all("td")
+            if len(cols) < 3:
+                continue  # Skip rows with insufficient data
+
+            feature_name = cols[0].get_text(strip=True)
+            input_price = cols[1].get_text(strip=True)
+            output_price = cols[2].get_text(strip=True)
+            model_pricing[feature_name] = {
+                headers[1]: input_price,
+                headers[2]: output_price,
+            }
+
+        amazon_pricing[model_name] = model_pricing
+
+    return amazon_pricing
+
+
+def get_bedrock_pricing(url, providers):
+    """
+    Fetches and parses AWS Bedrock pricing for specified providers.
+
+    Args:
+        url (str): URL of the AWS Bedrock pricing page.
+        providers (list): List of providers to extract pricing for.
+
+    Returns:
+        dict: A dictionary containing pricing data for the providers.
+    """
+    response = requests.get(url)
+    response.raise_for_status()
+    soup = BeautifulSoup(response.text, "html.parser")
+
+    pricing_data = {}
+
+    for provider in providers:
+        if provider == "amazon":
+            section = soup.find(
+                "li",
+                class_="lb-tabs-accordion-trigger",
+                text=lambda t: t and "Amazon" in t,
+            )
+            if not section:
+                pricing_data[provider] = "Amazon section not found"
+                continue
+
+            amazon_section = section.find_next("li", class_="lb-tabs-content-item")
+            if not amazon_section:
+                pricing_data[provider] = "Amazon models section not found"
+                continue
+
+            pricing_data[provider] = extract_amazon_pricing(amazon_section)
+        else:
+            # General logic for other providers
+            section = soup.find(
+                "h2", text=lambda t: t and provider.lower() in t.lower()
+            )
+            if not section:
+                pricing_data[provider] = "Provider section not found"
+                continue
+
+            table = section.find_next("table")
+            if not table:
+                pricing_data[provider] = "Pricing table not found"
+                continue
+
+            rows = table.find_all("tr")
+            headers = [header.get_text(strip=True) for header in rows[0].find_all("td")]
+            provider_pricing = {}
+
+            for row in rows[1:]:
+                cols = row.find_all("td")
+                if len(cols) < 3:
+                    continue
+
+                model_name = cols[0].get_text(strip=True)
+                input_price = cols[1].get_text(strip=True)
+                output_price = cols[2].get_text(strip=True)
+                provider_pricing[model_name] = {
+                    "Price per 1,000 input tokens": input_price,
+                    "Price per 1,000 output tokens": output_price,
+                }
+
+            pricing_data[provider] = provider_pricing
+
+    return pricing_data
+
+
+model_substring_map = {
+    "ai21": {"jurassic-2": "j2"},
+    "anthropic": {"claude-2-1": "claude-v2:1", "claude-2-0": "claude-v2"},
+    "meta": {"llama-2-chat-(13b)": "llama2-13b-chat"},
+    "cohere": {
+        "r+": "r-plus",
+        "embed-3-english": "embed-english-v3",
+        "embed-3-multilingual": "embed-multilingual-v3",
+    },
+}  # aliases used by bedrock in their real model name vs. pricing page
+
+
+def _handle_meta_model_name(model_name: str) -> str:
+    # Check if it's a Llama 2 chat model
+    if "llama-2-chat-" in model_name.lower():
+        # Extract the size (e.g., 13b, 70b) using string manipulation
+        # Look for pattern between "chat-(" and ")"
+        import re
+
+        if match := re.search(r"chat-\((\d+b)\)", model_name.lower()):
+            size = match.group(1)
+            return f"meta.llama2-{size}-chat"
+    return model_name
+
+
+def _handle_cohere_model_name(model_name: str) -> str:
+    if model_name.endswith("command-r"):
+        return "cohere.command-r-v1"
+    return model_name
+
+
+def _create_bedrock_model_name(provider: str, model_name: str):
+    complete_model_name = f"{provider.lower()}.{model_name.replace(' ', '-').replace('.', '-').replace('*', '').lower()}"
+    for provider_key, map in model_substring_map.items():
+        if provider_key == provider:
+            for model_substring, replacement in map.items():
+                print(
+                    f"model_substring: {model_substring}, replacement: {replacement}, received model_name: {model_name}"
+                )
+                if model_substring in complete_model_name:
+                    print(f"model_name: {complete_model_name}")
+                    complete_model_name = complete_model_name.replace(
+                        model_substring, replacement
+                    )
+                    print(f"model_name: {complete_model_name}")
+    if provider == "meta":
+        complete_model_name = _handle_meta_model_name(complete_model_name)
+    if provider == "cohere":
+        complete_model_name = _handle_cohere_model_name(complete_model_name)
+    return complete_model_name
+
+
+def _convert_str_to_float(price_str: str) -> float:
+    if "$" not in price_str:
+        return 0.0
+    return float(price_str.replace("$", ""))
+
+
+def _check_if_model_name_in_pricing(
+    bedrock_model_name: str,
+    input_cost_per_1k_tokens: str,
+    output_cost_per_1k_tokens: str,
+):
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    for model, value in litellm.model_cost.items():
+        if model.startswith(bedrock_model_name):
+            input_cost_per_token = (
+                _convert_str_to_float(input_cost_per_1k_tokens) / 1000
+            )
+            output_cost_per_token = (
+                _convert_str_to_float(output_cost_per_1k_tokens) / 1000
+            )
+            assert round(value["input_cost_per_token"], 10) == round(
+                input_cost_per_token, 10
+            ), f"Invalid input cost per token for {model} \n Bedrock pricing page name={bedrock_model_name} \n Got={value['input_cost_per_token']}, Expected={input_cost_per_token}"
+            assert round(value["output_cost_per_token"], 10) == round(
+                output_cost_per_token, 10
+            ), f"Invalid output cost per token for {model} \n Bedrock pricing page name={bedrock_model_name} \n Got={value['output_cost_per_token']}, Expected={output_cost_per_token}"
+            return True
+    return False
+
+
+if __name__ == "__main__":
+    try:
+        pricing = get_bedrock_pricing(PRICING_URL, PROVIDERS)
+        print("AWS Bedrock On-Demand Pricing:")
+        for provider, data in pricing.items():
+            print(f"\n{provider.capitalize()}:")
+            if isinstance(data, dict):
+                for model, details in data.items():
+                    complete_model_name = _create_bedrock_model_name(provider, model)
+                    print(f"details: {details}")
+                    assert _check_if_model_name_in_pricing(
+                        bedrock_model_name=complete_model_name,
+                        input_cost_per_1k_tokens=details[
+                            "Price per 1,000 input tokens"
+                        ],
+                        output_cost_per_1k_tokens=details[
+                            "Price per 1,000 output tokens"
+                        ],
+                    ), f"Model {complete_model_name} not found in litellm.model_cost"
+                    print(f"  {complete_model_name}:")
+                    if isinstance(details, dict):
+                        for detail, value in details.items():
+                            print(f"    {detail}: {value}")
+                    else:
+                        print(f"    {details}")
+            else:
+                print(f"  {data}")
+    except requests.RequestException as e:
+        print(f"Error fetching pricing data: {e}")
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@ -2766,12 +2766,64 @@ def test_add_known_models():
 def test_bedrock_cost_calc_with_region():
    from litellm import completion

-    response = completion(
-        model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
-        messages=[{"role": "user", "content": "Hello, how are you?"}],
-        aws_region_name="us-east-1",
-    )
-    assert response._hidden_params["response_cost"] > 0
+    from litellm import ModelResponse
+
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    hidden_params = {
+        "custom_llm_provider": "bedrock",
+        "region_name": "us-east-1",
+        "optional_params": {},
+        "litellm_call_id": "cf371a5d-679b-410f-b862-8084676d6d59",
+        "model_id": None,
+        "api_base": None,
+        "response_cost": 0.0005639999999999999,
+        "additional_headers": {},
+    }
+
+    litellm.set_verbose = True
+
+    bedrock_models = litellm.bedrock_models + litellm.bedrock_converse_models
+
+    for model in bedrock_models:
+        if litellm.model_cost[model]["mode"] == "chat":
+            response = {
+                "id": "cmpl-55db75e0b05344058b0bd8ee4e00bf84",
+                "choices": [
+                    {
+                        "finish_reason": "stop",
+                        "index": 0,
+                        "logprobs": None,
+                        "message": {
+                            "content": 'Here\'s one:\n\nWhy did the Linux kernel go to therapy?\n\nBecause it had a lot of "core" issues!\n\nHope that one made you laugh!',
+                            "refusal": None,
+                            "role": "assistant",
+                            "audio": None,
+                            "function_call": None,
+                            "tool_calls": [],
+                        },
+                    }
+                ],
+                "created": 1729243714,
+                "model": model,
+                "object": "chat.completion",
+                "service_tier": None,
+                "system_fingerprint": None,
+                "usage": {
+                    "completion_tokens": 32,
+                    "prompt_tokens": 16,
+                    "total_tokens": 48,
+                    "completion_tokens_details": None,
+                    "prompt_tokens_details": None,
+                },
+            }
+
+            model_response = ModelResponse(**response)
+            model_response._hidden_params = hidden_params
+            cost = completion_cost(model_response, custom_llm_provider="bedrock")
+
+            assert cost > 0


 # @pytest.mark.parametrize(