build(model_prices_and_context_window.json): add new dbrx llama 3.3 model

fixes llama cost calc on databricks
2025-04-26 03:04:13 +00:00 · 2024-12-11 13:01:22 -08:00 · 2024-12-11 13:01:22 -08:00 · 5fe77499d2
commit 5fe77499d2
parent 74917d7b16
3 changed files with 29 additions and 1 deletions
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -7265,6 +7265,19 @@
        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/meta-llama-3.3-70b-instruct": {
        "max_tokens": 128000,
        "max_input_tokens": 128000,
        "max_output_tokens": 128000, 
        "input_cost_per_token": 0.00000100002,
        "input_dbu_cost_per_token": 0.000014286,
        "output_cost_per_token": 0.00000299999,
        "output_dbu_cost_per_token": 0.000042857,
        "litellm_provider": "databricks",
        "mode": "chat",
        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-dbrx-instruct": {
        "max_tokens": 32768,
        "max_input_tokens": 32768,
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -4377,6 +4377,9 @@ def _strip_model_name(model: str, custom_llm_provider: Optional[str]) -> str:
    ):
        strip_version = _strip_stable_vertex_version(model_name=model)
        return strip_version
    elif custom_llm_provider and (custom_llm_provider == "databricks"):
        strip_version = _strip_stable_vertex_version(model_name=model)
        return strip_version
    else:
        strip_finetune = _strip_openai_finetune_model_name(model_name=model)
        return strip_finetune
@ -4542,7 +4545,6 @@ def get_model_info(  # noqa: PLR0915
            )
        #########################
        supported_openai_params = litellm.get_supported_openai_params(
            model=model, custom_llm_provider=custom_llm_provider
        )
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -7265,6 +7265,19 @@
        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/meta-llama-3.3-70b-instruct": {
        "max_tokens": 128000,
        "max_input_tokens": 128000,
        "max_output_tokens": 128000, 
        "input_cost_per_token": 0.00000100002,
        "input_dbu_cost_per_token": 0.000014286,
        "output_cost_per_token": 0.00000299999,
        "output_dbu_cost_per_token": 0.000042857,
        "litellm_provider": "databricks",
        "mode": "chat",
        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-dbrx-instruct": {
        "max_tokens": 32768,
        "max_input_tokens": 32768,