From 3ec4d1b1fc916dd621359362faebaec412e988a9 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 23 May 2024 17:08:51 -0700 Subject: [PATCH] build(model_prices_and_context_window.json): add databricks models to model cost map --- ...odel_prices_and_context_window_backup.json | 98 ++++++++++++++++--- model_prices_and_context_window.json | 98 ++++++++++++++++--- 2 files changed, 168 insertions(+), 28 deletions(-) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 7b62772ac..748a3f6ae 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1605,36 +1605,36 @@ "mode": "chat" }, "replicate/meta/llama-3-70b": { - "max_tokens": 4096, - "max_input_tokens": 4096, - "max_output_tokens": 4096, + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, "input_cost_per_token": 0.00000065, "output_cost_per_token": 0.00000275, "litellm_provider": "replicate", "mode": "chat" }, "replicate/meta/llama-3-70b-instruct": { - "max_tokens": 4096, - "max_input_tokens": 4096, - "max_output_tokens": 4096, + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, "input_cost_per_token": 0.00000065, "output_cost_per_token": 0.00000275, "litellm_provider": "replicate", "mode": "chat" }, "replicate/meta/llama-3-8b": { - "max_tokens": 4096, - "max_input_tokens": 4096, - "max_output_tokens": 4096, + "max_tokens": 8086, + "max_input_tokens": 8086, + "max_output_tokens": 8086, "input_cost_per_token": 0.00000005, "output_cost_per_token": 0.00000025, "litellm_provider": "replicate", "mode": "chat" }, "replicate/meta/llama-3-8b-instruct": { - "max_tokens": 4096, - "max_input_tokens": 4096, - "max_output_tokens": 4096, + "max_tokens": 8086, + "max_input_tokens": 8086, + "max_output_tokens": 8086, "input_cost_per_token": 0.00000005, "output_cost_per_token": 0.00000025, "litellm_provider": "replicate", @@ -1898,7 +1898,7 @@ "mode": "chat" }, "openrouter/meta-llama/codellama-34b-instruct": { - "max_tokens": 8096, + "max_tokens": 8192, "input_cost_per_token": 0.0000005, "output_cost_per_token": 0.0000005, "litellm_provider": "openrouter", @@ -3538,6 +3538,76 @@ "output_cost_per_token": 0.000000, "litellm_provider": "voyage", "mode": "embedding" - } + }, + "databricks/databricks-dbrx-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.00000075, + "output_cost_per_token": 0.00000225, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-meta-llama-3-70b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-llama-2-70b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000015, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-mixtral-8x7b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.000001, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-mpt-30b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-mpt-7b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000005, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-bge-large-en": { + "max_tokens": 512, + "max_input_tokens": 512, + "output_vector_size": 1024, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0, + "litellm_provider": "databricks", + "mode": "embedding", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + } } diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 7b62772ac..748a3f6ae 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1605,36 +1605,36 @@ "mode": "chat" }, "replicate/meta/llama-3-70b": { - "max_tokens": 4096, - "max_input_tokens": 4096, - "max_output_tokens": 4096, + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, "input_cost_per_token": 0.00000065, "output_cost_per_token": 0.00000275, "litellm_provider": "replicate", "mode": "chat" }, "replicate/meta/llama-3-70b-instruct": { - "max_tokens": 4096, - "max_input_tokens": 4096, - "max_output_tokens": 4096, + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, "input_cost_per_token": 0.00000065, "output_cost_per_token": 0.00000275, "litellm_provider": "replicate", "mode": "chat" }, "replicate/meta/llama-3-8b": { - "max_tokens": 4096, - "max_input_tokens": 4096, - "max_output_tokens": 4096, + "max_tokens": 8086, + "max_input_tokens": 8086, + "max_output_tokens": 8086, "input_cost_per_token": 0.00000005, "output_cost_per_token": 0.00000025, "litellm_provider": "replicate", "mode": "chat" }, "replicate/meta/llama-3-8b-instruct": { - "max_tokens": 4096, - "max_input_tokens": 4096, - "max_output_tokens": 4096, + "max_tokens": 8086, + "max_input_tokens": 8086, + "max_output_tokens": 8086, "input_cost_per_token": 0.00000005, "output_cost_per_token": 0.00000025, "litellm_provider": "replicate", @@ -1898,7 +1898,7 @@ "mode": "chat" }, "openrouter/meta-llama/codellama-34b-instruct": { - "max_tokens": 8096, + "max_tokens": 8192, "input_cost_per_token": 0.0000005, "output_cost_per_token": 0.0000005, "litellm_provider": "openrouter", @@ -3538,6 +3538,76 @@ "output_cost_per_token": 0.000000, "litellm_provider": "voyage", "mode": "embedding" - } + }, + "databricks/databricks-dbrx-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.00000075, + "output_cost_per_token": 0.00000225, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-meta-llama-3-70b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-llama-2-70b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000015, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-mixtral-8x7b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.000001, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-mpt-30b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-mpt-7b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000005, + "litellm_provider": "databricks", + "mode": "chat", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-bge-large-en": { + "max_tokens": 512, + "max_input_tokens": 512, + "output_vector_size": 1024, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0, + "litellm_provider": "databricks", + "mode": "embedding", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + } }