mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
build(model_prices_and_context_window.json): add azure ai jamba instruct pricing + token details
Adds jamba instruct, mistral, llama3 pricing + token info for azure_ai
This commit is contained in:
parent
caa01d20cb
commit
b1be355d42
5 changed files with 116 additions and 3 deletions
|
@ -698,6 +698,44 @@
|
|||
"litellm_provider": "azure",
|
||||
"mode": "image_generation"
|
||||
},
|
||||
"azure_ai/jamba-instruct": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 70000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
"output_cost_per_token": 0.0000007,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat"
|
||||
},
|
||||
"azure_ai/mistral-large": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 32000,
|
||||
"max_output_tokens": 8191,
|
||||
"input_cost_per_token": 0.000004,
|
||||
"output_cost_per_token": 0.000012,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"azure_ai/mistral-small": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 32000,
|
||||
"max_output_tokens": 8191,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000003,
|
||||
"litellm_provider": "azure_ai",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"azure_ai/Meta-Llama-3-70B-Instruct": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.0000011,
|
||||
"output_cost_per_token": 0.00000037,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat"
|
||||
},
|
||||
"babbage-002": {
|
||||
"max_tokens": 16384,
|
||||
"max_input_tokens": 16384,
|
||||
|
|
|
@ -4,15 +4,21 @@ model_list:
|
|||
api_base: os.environ/AZURE_AI_MISTRAL_API_BASE
|
||||
api_key: os.environ/AZURE_AI_MISTRAL_API_KEY
|
||||
model: azure_ai/Mistral-large-nmefg
|
||||
input_cost_per_token: 0.00001
|
||||
output_cost_per_token: 0.000004
|
||||
- model_name: azure-ai-phi
|
||||
litellm_params:
|
||||
api_base: os.environ/AZURE_AI_PHI_API_BASE
|
||||
api_key: os.environ/AZURE_AI_PHI_API_KEY
|
||||
model: azure_ai/Phi-3-medium-128k-instruct-fpmvj
|
||||
- model_name: azure-ai-jamba-instruct
|
||||
litellm_params:
|
||||
api_base: "https://AI21-Jamba-Instruct-jpddv.eastus2.models.ai.azure.com"
|
||||
api_key: "WJkvJneEcBMhFqK8zZBaAVw9cl4Ec5Pb"
|
||||
model: azure_ai/jamba-instruct
|
||||
|
||||
|
||||
general_settings:
|
||||
alerting: ["slack"]
|
||||
alerting_threshold: 10
|
||||
master_key: sk-1234
|
||||
pass_through_endpoints:
|
||||
|
|
|
@ -8,6 +8,7 @@ sys.path.insert(
|
|||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import asyncio
|
||||
import os
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
|
@ -785,6 +786,28 @@ def test_vertex_ai_embedding_completion_cost(caplog):
|
|||
# assert False
|
||||
|
||||
|
||||
def test_completion_azure_ai():
|
||||
try:
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
litellm.set_verbose = True
|
||||
response = litellm.completion(
|
||||
model="azure_ai/Mistral-large-nmefg",
|
||||
messages=[{"content": "what llm are you", "role": "user"}],
|
||||
max_tokens=15,
|
||||
num_retries=3,
|
||||
api_base=os.getenv("AZURE_AI_MISTRAL_API_BASE"),
|
||||
api_key=os.getenv("AZURE_AI_MISTRAL_API_KEY"),
|
||||
)
|
||||
print(response)
|
||||
|
||||
assert "response_cost" in response._hidden_params
|
||||
assert isinstance(response._hidden_params["response_cost"], float)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_completion_cost_hidden_params(sync_mode):
|
||||
|
|
|
@ -5573,8 +5573,16 @@ def convert_to_model_response_object(
|
|||
"system_fingerprint"
|
||||
]
|
||||
|
||||
if "model" in response_object and model_response_object.model is None:
|
||||
model_response_object.model = response_object["model"]
|
||||
if "model" in response_object:
|
||||
if model_response_object.model is None:
|
||||
model_response_object.model = response_object["model"]
|
||||
elif "/" in model_response_object.model:
|
||||
openai_compatible_provider = model_response_object.model.split("/")[
|
||||
0
|
||||
]
|
||||
model_response_object.model = (
|
||||
openai_compatible_provider + "/" + response_object["model"]
|
||||
)
|
||||
|
||||
if start_time is not None and end_time is not None:
|
||||
if isinstance(start_time, type(end_time)):
|
||||
|
|
|
@ -698,6 +698,44 @@
|
|||
"litellm_provider": "azure",
|
||||
"mode": "image_generation"
|
||||
},
|
||||
"azure_ai/jamba-instruct": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 70000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
"output_cost_per_token": 0.0000007,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat"
|
||||
},
|
||||
"azure_ai/mistral-large": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 32000,
|
||||
"max_output_tokens": 8191,
|
||||
"input_cost_per_token": 0.000004,
|
||||
"output_cost_per_token": 0.000012,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"azure_ai/mistral-small": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 32000,
|
||||
"max_output_tokens": 8191,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000003,
|
||||
"litellm_provider": "azure_ai",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"azure_ai/Meta-Llama-3-70B-Instruct": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.0000011,
|
||||
"output_cost_per_token": 0.00000037,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat"
|
||||
},
|
||||
"babbage-002": {
|
||||
"max_tokens": 16384,
|
||||
"max_input_tokens": 16384,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue