forked from phoenix/litellm-mirror
build(model_prices_and_context_window.json): add azure ai jamba instruct pricing + token details
Adds jamba instruct, mistral, llama3 pricing + token info for azure_ai
This commit is contained in:
parent
caa01d20cb
commit
b1be355d42
5 changed files with 116 additions and 3 deletions
|
@ -698,6 +698,44 @@
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "image_generation"
|
"mode": "image_generation"
|
||||||
},
|
},
|
||||||
|
"azure_ai/jamba-instruct": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 70000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.0000005,
|
||||||
|
"output_cost_per_token": 0.0000007,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"azure_ai/mistral-large": {
|
||||||
|
"max_tokens": 8191,
|
||||||
|
"max_input_tokens": 32000,
|
||||||
|
"max_output_tokens": 8191,
|
||||||
|
"input_cost_per_token": 0.000004,
|
||||||
|
"output_cost_per_token": 0.000012,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
|
},
|
||||||
|
"azure_ai/mistral-small": {
|
||||||
|
"max_tokens": 8191,
|
||||||
|
"max_input_tokens": 32000,
|
||||||
|
"max_output_tokens": 8191,
|
||||||
|
"input_cost_per_token": 0.000001,
|
||||||
|
"output_cost_per_token": 0.000003,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"azure_ai/Meta-Llama-3-70B-Instruct": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.0000011,
|
||||||
|
"output_cost_per_token": 0.00000037,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"babbage-002": {
|
"babbage-002": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
"max_input_tokens": 16384,
|
"max_input_tokens": 16384,
|
||||||
|
|
|
@ -4,15 +4,21 @@ model_list:
|
||||||
api_base: os.environ/AZURE_AI_MISTRAL_API_BASE
|
api_base: os.environ/AZURE_AI_MISTRAL_API_BASE
|
||||||
api_key: os.environ/AZURE_AI_MISTRAL_API_KEY
|
api_key: os.environ/AZURE_AI_MISTRAL_API_KEY
|
||||||
model: azure_ai/Mistral-large-nmefg
|
model: azure_ai/Mistral-large-nmefg
|
||||||
|
input_cost_per_token: 0.00001
|
||||||
|
output_cost_per_token: 0.000004
|
||||||
- model_name: azure-ai-phi
|
- model_name: azure-ai-phi
|
||||||
litellm_params:
|
litellm_params:
|
||||||
api_base: os.environ/AZURE_AI_PHI_API_BASE
|
api_base: os.environ/AZURE_AI_PHI_API_BASE
|
||||||
api_key: os.environ/AZURE_AI_PHI_API_KEY
|
api_key: os.environ/AZURE_AI_PHI_API_KEY
|
||||||
model: azure_ai/Phi-3-medium-128k-instruct-fpmvj
|
model: azure_ai/Phi-3-medium-128k-instruct-fpmvj
|
||||||
|
- model_name: azure-ai-jamba-instruct
|
||||||
|
litellm_params:
|
||||||
|
api_base: "https://AI21-Jamba-Instruct-jpddv.eastus2.models.ai.azure.com"
|
||||||
|
api_key: "WJkvJneEcBMhFqK8zZBaAVw9cl4Ec5Pb"
|
||||||
|
model: azure_ai/jamba-instruct
|
||||||
|
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
alerting: ["slack"]
|
|
||||||
alerting_threshold: 10
|
alerting_threshold: 10
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
pass_through_endpoints:
|
pass_through_endpoints:
|
||||||
|
|
|
@ -8,6 +8,7 @@ sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import os
|
||||||
import time
|
import time
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
@ -785,6 +786,28 @@ def test_vertex_ai_embedding_completion_cost(caplog):
|
||||||
# assert False
|
# assert False
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_azure_ai():
|
||||||
|
try:
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
|
||||||
|
litellm.set_verbose = True
|
||||||
|
response = litellm.completion(
|
||||||
|
model="azure_ai/Mistral-large-nmefg",
|
||||||
|
messages=[{"content": "what llm are you", "role": "user"}],
|
||||||
|
max_tokens=15,
|
||||||
|
num_retries=3,
|
||||||
|
api_base=os.getenv("AZURE_AI_MISTRAL_API_BASE"),
|
||||||
|
api_key=os.getenv("AZURE_AI_MISTRAL_API_KEY"),
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
assert "response_cost" in response._hidden_params
|
||||||
|
assert isinstance(response._hidden_params["response_cost"], float)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_completion_cost_hidden_params(sync_mode):
|
async def test_completion_cost_hidden_params(sync_mode):
|
||||||
|
|
|
@ -5573,8 +5573,16 @@ def convert_to_model_response_object(
|
||||||
"system_fingerprint"
|
"system_fingerprint"
|
||||||
]
|
]
|
||||||
|
|
||||||
if "model" in response_object and model_response_object.model is None:
|
if "model" in response_object:
|
||||||
model_response_object.model = response_object["model"]
|
if model_response_object.model is None:
|
||||||
|
model_response_object.model = response_object["model"]
|
||||||
|
elif "/" in model_response_object.model:
|
||||||
|
openai_compatible_provider = model_response_object.model.split("/")[
|
||||||
|
0
|
||||||
|
]
|
||||||
|
model_response_object.model = (
|
||||||
|
openai_compatible_provider + "/" + response_object["model"]
|
||||||
|
)
|
||||||
|
|
||||||
if start_time is not None and end_time is not None:
|
if start_time is not None and end_time is not None:
|
||||||
if isinstance(start_time, type(end_time)):
|
if isinstance(start_time, type(end_time)):
|
||||||
|
|
|
@ -698,6 +698,44 @@
|
||||||
"litellm_provider": "azure",
|
"litellm_provider": "azure",
|
||||||
"mode": "image_generation"
|
"mode": "image_generation"
|
||||||
},
|
},
|
||||||
|
"azure_ai/jamba-instruct": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 70000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.0000005,
|
||||||
|
"output_cost_per_token": 0.0000007,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"azure_ai/mistral-large": {
|
||||||
|
"max_tokens": 8191,
|
||||||
|
"max_input_tokens": 32000,
|
||||||
|
"max_output_tokens": 8191,
|
||||||
|
"input_cost_per_token": 0.000004,
|
||||||
|
"output_cost_per_token": 0.000012,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
|
},
|
||||||
|
"azure_ai/mistral-small": {
|
||||||
|
"max_tokens": 8191,
|
||||||
|
"max_input_tokens": 32000,
|
||||||
|
"max_output_tokens": 8191,
|
||||||
|
"input_cost_per_token": 0.000001,
|
||||||
|
"output_cost_per_token": 0.000003,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"azure_ai/Meta-Llama-3-70B-Instruct": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.0000011,
|
||||||
|
"output_cost_per_token": 0.00000037,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"babbage-002": {
|
"babbage-002": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
"max_input_tokens": 16384,
|
"max_input_tokens": 16384,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue