mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
* LiteLLM Minor Fixes & Improvements (09/23/2024) (#5842) * feat(auth_utils.py): enable admin to allow client-side credentials to be passed Makes it easier for devs to experiment with finetuned fireworks ai models * feat(router.py): allow setting configurable_clientside_auth_params for a model Closes https://github.com/BerriAI/litellm/issues/5843 * build(model_prices_and_context_window.json): fix anthropic claude-3-5-sonnet max output token limit Fixes https://github.com/BerriAI/litellm/issues/5850 * fix(azure_ai/): support content list for azure ai Fixes https://github.com/BerriAI/litellm/issues/4237 * fix(litellm_logging.py): always set saved_cache_cost Set to 0 by default * fix(fireworks_ai/cost_calculator.py): add fireworks ai default pricing handles calling 405b+ size models * fix(slack_alerting.py): fix error alerting for failed spend tracking Fixes regression with slack alerting error monitoring * fix(vertex_and_google_ai_studio_gemini.py): handle gemini no candidates in streaming chunk error * docs(bedrock.md): add llama3-1 models * test: fix tests * fix(azure_ai/chat): fix transformation for azure ai calls
78 lines
2.5 KiB
Python
78 lines
2.5 KiB
Python
"""
|
|
For calculating cost of fireworks ai serverless inference models.
|
|
"""
|
|
|
|
from typing import Tuple
|
|
|
|
from litellm.types.utils import Usage
|
|
from litellm.utils import get_model_info
|
|
|
|
|
|
# Extract the number of billion parameters from the model name
|
|
# only used for together_computer LLMs
|
|
def get_base_model_for_pricing(model_name: str) -> str:
|
|
"""
|
|
Helper function for calculating together ai pricing.
|
|
|
|
Returns:
|
|
- str: model pricing category if mapped else received model name
|
|
"""
|
|
import re
|
|
|
|
model_name = model_name.lower()
|
|
|
|
# Check for MoE models in the form <number>x<number>b
|
|
moe_match = re.search(r"(\d+)x(\d+)b", model_name)
|
|
if moe_match:
|
|
total_billion = int(moe_match.group(1)) * int(moe_match.group(2))
|
|
if total_billion <= 56:
|
|
return "fireworks-ai-moe-up-to-56b"
|
|
elif total_billion <= 176:
|
|
return "fireworks-ai-56b-to-176b"
|
|
|
|
# Check for standard models in the form <number>b
|
|
re_params_match = re.search(r"(\d+)b", model_name)
|
|
if re_params_match is not None:
|
|
params_match = str(re_params_match.group(1))
|
|
params_billion = float(params_match)
|
|
|
|
# Determine the category based on the number of parameters
|
|
if params_billion <= 16.0:
|
|
return "fireworks-ai-up-to-16b"
|
|
elif params_billion <= 80.0:
|
|
return "fireworks-ai-16b-80b"
|
|
|
|
# If no matches, return the original model_name
|
|
return "fireworks-ai-default"
|
|
|
|
|
|
def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
|
|
"""
|
|
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
|
|
|
Input:
|
|
- model: str, the model name without provider prefix
|
|
- usage: LiteLLM Usage block, containing anthropic caching information
|
|
|
|
Returns:
|
|
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
|
"""
|
|
## check if model mapped, else use default pricing
|
|
try:
|
|
model_info = get_model_info(model=model, custom_llm_provider="fireworks_ai")
|
|
except Exception:
|
|
base_model = get_base_model_for_pricing(model_name=model)
|
|
|
|
## GET MODEL INFO
|
|
model_info = get_model_info(
|
|
model=base_model, custom_llm_provider="fireworks_ai"
|
|
)
|
|
|
|
## CALCULATE INPUT COST
|
|
|
|
prompt_cost: float = usage["prompt_tokens"] * model_info["input_cost_per_token"]
|
|
|
|
## CALCULATE OUTPUT COST
|
|
completion_cost = usage["completion_tokens"] * model_info["output_cost_per_token"]
|
|
|
|
return prompt_cost, completion_cost
|