build(model_prices_and_context_window.json): add azure ai jamba instruct pricing + token details

Adds jamba instruct, mistral, llama3 pricing + token info for azure_ai
This commit is contained in:
Krrish Dholakia 2024-07-13 16:34:31 -07:00
parent caa01d20cb
commit b1be355d42
5 changed files with 116 additions and 3 deletions

View file

@ -698,6 +698,44 @@
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "image_generation" "mode": "image_generation"
}, },
"azure_ai/jamba-instruct": {
"max_tokens": 4096,
"max_input_tokens": 70000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.0000005,
"output_cost_per_token": 0.0000007,
"litellm_provider": "azure_ai",
"mode": "chat"
},
"azure_ai/mistral-large": {
"max_tokens": 8191,
"max_input_tokens": 32000,
"max_output_tokens": 8191,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000012,
"litellm_provider": "azure_ai",
"mode": "chat",
"supports_function_calling": true
},
"azure_ai/mistral-small": {
"max_tokens": 8191,
"max_input_tokens": 32000,
"max_output_tokens": 8191,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000003,
"litellm_provider": "azure_ai",
"supports_function_calling": true,
"mode": "chat"
},
"azure_ai/Meta-Llama-3-70B-Instruct": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0000011,
"output_cost_per_token": 0.00000037,
"litellm_provider": "azure_ai",
"mode": "chat"
},
"babbage-002": { "babbage-002": {
"max_tokens": 16384, "max_tokens": 16384,
"max_input_tokens": 16384, "max_input_tokens": 16384,

View file

@ -4,15 +4,21 @@ model_list:
api_base: os.environ/AZURE_AI_MISTRAL_API_BASE api_base: os.environ/AZURE_AI_MISTRAL_API_BASE
api_key: os.environ/AZURE_AI_MISTRAL_API_KEY api_key: os.environ/AZURE_AI_MISTRAL_API_KEY
model: azure_ai/Mistral-large-nmefg model: azure_ai/Mistral-large-nmefg
input_cost_per_token: 0.00001
output_cost_per_token: 0.000004
- model_name: azure-ai-phi - model_name: azure-ai-phi
litellm_params: litellm_params:
api_base: os.environ/AZURE_AI_PHI_API_BASE api_base: os.environ/AZURE_AI_PHI_API_BASE
api_key: os.environ/AZURE_AI_PHI_API_KEY api_key: os.environ/AZURE_AI_PHI_API_KEY
model: azure_ai/Phi-3-medium-128k-instruct-fpmvj model: azure_ai/Phi-3-medium-128k-instruct-fpmvj
- model_name: azure-ai-jamba-instruct
litellm_params:
api_base: "https://AI21-Jamba-Instruct-jpddv.eastus2.models.ai.azure.com"
api_key: "WJkvJneEcBMhFqK8zZBaAVw9cl4Ec5Pb"
model: azure_ai/jamba-instruct
general_settings: general_settings:
alerting: ["slack"]
alerting_threshold: 10 alerting_threshold: 10
master_key: sk-1234 master_key: sk-1234
pass_through_endpoints: pass_through_endpoints:

View file

@ -8,6 +8,7 @@ sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import asyncio import asyncio
import os
import time import time
from typing import Optional from typing import Optional
@ -785,6 +786,28 @@ def test_vertex_ai_embedding_completion_cost(caplog):
# assert False # assert False
def test_completion_azure_ai():
try:
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.set_verbose = True
response = litellm.completion(
model="azure_ai/Mistral-large-nmefg",
messages=[{"content": "what llm are you", "role": "user"}],
max_tokens=15,
num_retries=3,
api_base=os.getenv("AZURE_AI_MISTRAL_API_BASE"),
api_key=os.getenv("AZURE_AI_MISTRAL_API_KEY"),
)
print(response)
assert "response_cost" in response._hidden_params
assert isinstance(response._hidden_params["response_cost"], float)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
@pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_completion_cost_hidden_params(sync_mode): async def test_completion_cost_hidden_params(sync_mode):

View file

@ -5573,8 +5573,16 @@ def convert_to_model_response_object(
"system_fingerprint" "system_fingerprint"
] ]
if "model" in response_object and model_response_object.model is None: if "model" in response_object:
if model_response_object.model is None:
model_response_object.model = response_object["model"] model_response_object.model = response_object["model"]
elif "/" in model_response_object.model:
openai_compatible_provider = model_response_object.model.split("/")[
0
]
model_response_object.model = (
openai_compatible_provider + "/" + response_object["model"]
)
if start_time is not None and end_time is not None: if start_time is not None and end_time is not None:
if isinstance(start_time, type(end_time)): if isinstance(start_time, type(end_time)):

View file

@ -698,6 +698,44 @@
"litellm_provider": "azure", "litellm_provider": "azure",
"mode": "image_generation" "mode": "image_generation"
}, },
"azure_ai/jamba-instruct": {
"max_tokens": 4096,
"max_input_tokens": 70000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.0000005,
"output_cost_per_token": 0.0000007,
"litellm_provider": "azure_ai",
"mode": "chat"
},
"azure_ai/mistral-large": {
"max_tokens": 8191,
"max_input_tokens": 32000,
"max_output_tokens": 8191,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000012,
"litellm_provider": "azure_ai",
"mode": "chat",
"supports_function_calling": true
},
"azure_ai/mistral-small": {
"max_tokens": 8191,
"max_input_tokens": 32000,
"max_output_tokens": 8191,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000003,
"litellm_provider": "azure_ai",
"supports_function_calling": true,
"mode": "chat"
},
"azure_ai/Meta-Llama-3-70B-Instruct": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0000011,
"output_cost_per_token": 0.00000037,
"litellm_provider": "azure_ai",
"mode": "chat"
},
"babbage-002": { "babbage-002": {
"max_tokens": 16384, "max_tokens": 16384,
"max_input_tokens": 16384, "max_input_tokens": 16384,