build(model_prices_and_context_window.json): add azure ai jamba instruct pricing + token details

Adds jamba instruct, mistral, llama3 pricing + token info for azure_ai
2025-04-26 11:14:04 +00:00 · 2024-07-13 16:34:31 -07:00 · 2024-07-13 16:34:31 -07:00 · b1be355d42
commit b1be355d42
parent caa01d20cb
5 changed files with 116 additions and 3 deletions
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -698,6 +698,44 @@
        "litellm_provider": "azure", 
        "mode": "image_generation"
    },
    "azure_ai/jamba-instruct": {
        "max_tokens": 4096,
        "max_input_tokens": 70000,
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.0000005,
        "output_cost_per_token": 0.0000007,
        "litellm_provider": "azure_ai",
        "mode": "chat"
    },
    "azure_ai/mistral-large": {
        "max_tokens": 8191,
        "max_input_tokens": 32000,
        "max_output_tokens": 8191,
        "input_cost_per_token": 0.000004,
        "output_cost_per_token": 0.000012,
        "litellm_provider": "azure_ai",
        "mode": "chat",
        "supports_function_calling": true
    },
    "azure_ai/mistral-small": {
        "max_tokens": 8191,
        "max_input_tokens": 32000,
        "max_output_tokens": 8191,
        "input_cost_per_token": 0.000001,
        "output_cost_per_token": 0.000003,
        "litellm_provider": "azure_ai",
        "supports_function_calling": true,
        "mode": "chat"
    },
    "azure_ai/Meta-Llama-3-70B-Instruct": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
        "max_output_tokens": 8192,
        "input_cost_per_token": 0.0000011,
        "output_cost_per_token": 0.00000037,
        "litellm_provider": "azure_ai",
        "mode": "chat"
    },
    "babbage-002": {
        "max_tokens": 16384,
        "max_input_tokens": 16384,
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -4,15 +4,21 @@ model_list:
      api_base: os.environ/AZURE_AI_MISTRAL_API_BASE
      api_key: os.environ/AZURE_AI_MISTRAL_API_KEY
      model: azure_ai/Mistral-large-nmefg
      input_cost_per_token: 0.00001
      output_cost_per_token: 0.000004
  - model_name: azure-ai-phi
    litellm_params:
      api_base: os.environ/AZURE_AI_PHI_API_BASE
      api_key: os.environ/AZURE_AI_PHI_API_KEY
      model: azure_ai/Phi-3-medium-128k-instruct-fpmvj
  - model_name: azure-ai-jamba-instruct
    litellm_params:
      api_base: "https://AI21-Jamba-Instruct-jpddv.eastus2.models.ai.azure.com"
      api_key: "WJkvJneEcBMhFqK8zZBaAVw9cl4Ec5Pb"
      model: azure_ai/jamba-instruct
 general_settings:
  alerting: ["slack"]
  alerting_threshold: 10
  master_key: sk-1234
  pass_through_endpoints:
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@ -8,6 +8,7 @@ sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import asyncio
 import os
 import time
 from typing import Optional
@ -785,6 +786,28 @@ def test_vertex_ai_embedding_completion_cost(caplog):
 #     assert False
 def test_completion_azure_ai():
    try:
        os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
        litellm.model_cost = litellm.get_model_cost_map(url="")
        litellm.set_verbose = True
        response = litellm.completion(
            model="azure_ai/Mistral-large-nmefg",
            messages=[{"content": "what llm are you", "role": "user"}],
            max_tokens=15,
            num_retries=3,
            api_base=os.getenv("AZURE_AI_MISTRAL_API_BASE"),
            api_key=os.getenv("AZURE_AI_MISTRAL_API_KEY"),
        )
        print(response)
        assert "response_cost" in response._hidden_params
        assert isinstance(response._hidden_params["response_cost"], float)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
 async def test_completion_cost_hidden_params(sync_mode):
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -5573,8 +5573,16 @@ def convert_to_model_response_object(
                    "system_fingerprint"
                ]
-            if "model" in response_object and model_response_object.model is None:
+            if "model" in response_object:
                if model_response_object.model is None:
                    model_response_object.model = response_object["model"]
                elif "/" in model_response_object.model:
                    openai_compatible_provider = model_response_object.model.split("/")[
                        0
                    ]
                    model_response_object.model = (
                        openai_compatible_provider + "/" + response_object["model"]
                    )
            if start_time is not None and end_time is not None:
                if isinstance(start_time, type(end_time)):
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -698,6 +698,44 @@
        "litellm_provider": "azure", 
        "mode": "image_generation"
    },
    "azure_ai/jamba-instruct": {
        "max_tokens": 4096,
        "max_input_tokens": 70000,
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.0000005,
        "output_cost_per_token": 0.0000007,
        "litellm_provider": "azure_ai",
        "mode": "chat"
    },
    "azure_ai/mistral-large": {
        "max_tokens": 8191,
        "max_input_tokens": 32000,
        "max_output_tokens": 8191,
        "input_cost_per_token": 0.000004,
        "output_cost_per_token": 0.000012,
        "litellm_provider": "azure_ai",
        "mode": "chat",
        "supports_function_calling": true
    },
    "azure_ai/mistral-small": {
        "max_tokens": 8191,
        "max_input_tokens": 32000,
        "max_output_tokens": 8191,
        "input_cost_per_token": 0.000001,
        "output_cost_per_token": 0.000003,
        "litellm_provider": "azure_ai",
        "supports_function_calling": true,
        "mode": "chat"
    },
    "azure_ai/Meta-Llama-3-70B-Instruct": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
        "max_output_tokens": 8192,
        "input_cost_per_token": 0.0000011,
        "output_cost_per_token": 0.00000037,
        "litellm_provider": "azure_ai",
        "mode": "chat"
    },
    "babbage-002": {
        "max_tokens": 16384,
        "max_input_tokens": 16384,