build(model_prices_and_context_window.json): add azure ai jamba instruct pricing + token details

Adds jamba instruct, mistral, llama3 pricing + token info for azure_ai
2025-04-25 18:54:30 +00:00 · 2024-07-13 16:34:31 -07:00 · 2024-07-13 16:34:31 -07:00 · b1be355d42
commit b1be355d42
parent caa01d20cb
5 changed files with 116 additions and 3 deletions
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -698,6 +698,44 @@
        "litellm_provider": "azure", 
        "mode": "image_generation"
    },
+    "azure_ai/jamba-instruct": {
+        "max_tokens": 4096,
+        "max_input_tokens": 70000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0000005,
+        "output_cost_per_token": 0.0000007,
+        "litellm_provider": "azure_ai",
+        "mode": "chat"
+    },
+    "azure_ai/mistral-large": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000004,
+        "output_cost_per_token": 0.000012,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "azure_ai/mistral-small": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000001,
+        "output_cost_per_token": 0.000003,
+        "litellm_provider": "azure_ai",
+        "supports_function_calling": true,
+        "mode": "chat"
+    },
+    "azure_ai/Meta-Llama-3-70B-Instruct": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0000011,
+        "output_cost_per_token": 0.00000037,
+        "litellm_provider": "azure_ai",
+        "mode": "chat"
+    },
    "babbage-002": {
        "max_tokens": 16384,
        "max_input_tokens": 16384,
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -4,15 +4,21 @@ model_list:
      api_base: os.environ/AZURE_AI_MISTRAL_API_BASE
      api_key: os.environ/AZURE_AI_MISTRAL_API_KEY
      model: azure_ai/Mistral-large-nmefg
+      input_cost_per_token: 0.00001
+      output_cost_per_token: 0.000004
  - model_name: azure-ai-phi
    litellm_params:
      api_base: os.environ/AZURE_AI_PHI_API_BASE
      api_key: os.environ/AZURE_AI_PHI_API_KEY
      model: azure_ai/Phi-3-medium-128k-instruct-fpmvj
+  - model_name: azure-ai-jamba-instruct
+    litellm_params:
+      api_base: "https://AI21-Jamba-Instruct-jpddv.eastus2.models.ai.azure.com"
+      api_key: "WJkvJneEcBMhFqK8zZBaAVw9cl4Ec5Pb"
+      model: azure_ai/jamba-instruct


 general_settings:
-  alerting: ["slack"]
  alerting_threshold: 10
  master_key: sk-1234
  pass_through_endpoints:
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@ -8,6 +8,7 @@ sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import asyncio
+import os
 import time
 from typing import Optional

@ -785,6 +786,28 @@ def test_vertex_ai_embedding_completion_cost(caplog):
 #     assert False


+def test_completion_azure_ai():
+    try:
+        os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+        litellm.model_cost = litellm.get_model_cost_map(url="")
+
+        litellm.set_verbose = True
+        response = litellm.completion(
+            model="azure_ai/Mistral-large-nmefg",
+            messages=[{"content": "what llm are you", "role": "user"}],
+            max_tokens=15,
+            num_retries=3,
+            api_base=os.getenv("AZURE_AI_MISTRAL_API_BASE"),
+            api_key=os.getenv("AZURE_AI_MISTRAL_API_KEY"),
+        )
+        print(response)
+
+        assert "response_cost" in response._hidden_params
+        assert isinstance(response._hidden_params["response_cost"], float)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
 async def test_completion_cost_hidden_params(sync_mode):
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -5573,8 +5573,16 @@ def convert_to_model_response_object(
                    "system_fingerprint"
                ]

-            if "model" in response_object and model_response_object.model is None:
-                model_response_object.model = response_object["model"]
+            if "model" in response_object:
+                if model_response_object.model is None:
+                    model_response_object.model = response_object["model"]
+                elif "/" in model_response_object.model:
+                    openai_compatible_provider = model_response_object.model.split("/")[
+                        0
+                    ]
+                    model_response_object.model = (
+                        openai_compatible_provider + "/" + response_object["model"]
+                    )

            if start_time is not None and end_time is not None:
                if isinstance(start_time, type(end_time)):
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -698,6 +698,44 @@
        "litellm_provider": "azure", 
        "mode": "image_generation"
    },
+    "azure_ai/jamba-instruct": {
+        "max_tokens": 4096,
+        "max_input_tokens": 70000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0000005,
+        "output_cost_per_token": 0.0000007,
+        "litellm_provider": "azure_ai",
+        "mode": "chat"
+    },
+    "azure_ai/mistral-large": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000004,
+        "output_cost_per_token": 0.000012,
+        "litellm_provider": "azure_ai",
+        "mode": "chat",
+        "supports_function_calling": true
+    },
+    "azure_ai/mistral-small": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000001,
+        "output_cost_per_token": 0.000003,
+        "litellm_provider": "azure_ai",
+        "supports_function_calling": true,
+        "mode": "chat"
+    },
+    "azure_ai/Meta-Llama-3-70B-Instruct": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0000011,
+        "output_cost_per_token": 0.00000037,
+        "litellm_provider": "azure_ai",
+        "mode": "chat"
+    },
    "babbage-002": {
        "max_tokens": 16384,
        "max_input_tokens": 16384,