From 5fe77499d29e4369aa7c83059ff9efb4da0230f5 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 11 Dec 2024 13:01:22 -0800
Subject: [PATCH] build(model_prices_and_context_window.json): add new dbrx
 llama 3.3 model

fixes llama cost calc on databricks
---
 litellm/model_prices_and_context_window_backup.json | 13 +++++++++++++
 litellm/utils.py                                    |  4 +++-
 model_prices_and_context_window.json                | 13 +++++++++++++
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 6099e22f23..521ecc2273 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -7265,6 +7265,19 @@
         "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
         "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
     },
+    "databricks/meta-llama-3.3-70b-instruct": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000, 
+        "input_cost_per_token": 0.00000100002,
+        "input_dbu_cost_per_token": 0.000014286,
+        "output_cost_per_token": 0.00000299999,
+        "output_dbu_cost_per_token": 0.000042857,
+        "litellm_provider": "databricks",
+        "mode": "chat",
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
+        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
+    },
     "databricks/databricks-dbrx-instruct": {
         "max_tokens": 32768,
         "max_input_tokens": 32768,
diff --git a/litellm/utils.py b/litellm/utils.py
index 3bbab724a8..d0afd1831a 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -4377,6 +4377,9 @@ def _strip_model_name(model: str, custom_llm_provider: Optional[str]) -> str:
     ):
         strip_version = _strip_stable_vertex_version(model_name=model)
         return strip_version
+    elif custom_llm_provider and (custom_llm_provider == "databricks"):
+        strip_version = _strip_stable_vertex_version(model_name=model)
+        return strip_version
     else:
         strip_finetune = _strip_openai_finetune_model_name(model_name=model)
         return strip_finetune
@@ -4542,7 +4545,6 @@ def get_model_info(  # noqa: PLR0915
             )
 
         #########################
-
         supported_openai_params = litellm.get_supported_openai_params(
             model=model, custom_llm_provider=custom_llm_provider
         )
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 6099e22f23..521ecc2273 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -7265,6 +7265,19 @@
         "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
         "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
     },
+    "databricks/meta-llama-3.3-70b-instruct": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000, 
+        "input_cost_per_token": 0.00000100002,
+        "input_dbu_cost_per_token": 0.000014286,
+        "output_cost_per_token": 0.00000299999,
+        "output_dbu_cost_per_token": 0.000042857,
+        "litellm_provider": "databricks",
+        "mode": "chat",
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
+        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
+    },
     "databricks/databricks-dbrx-instruct": {
         "max_tokens": 32768,
         "max_input_tokens": 32768,