mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
Allow assigning teams to org on UI + OpenAI omni-moderation
cost model tracking (#7566)
* feat(cost_calculator.py): add cost tracking ($0) for openai moderations endpoint removes sentry cost tracking errors caused by this * build(teams.tsx): allow assigning teams to orgs
This commit is contained in:
parent
b769b826d0
commit
12a78fe05f
5 changed files with 227 additions and 141 deletions
|
@ -545,9 +545,12 @@ def completion_cost( # noqa: PLR0915
|
|||
isinstance(completion_response, BaseModel)
|
||||
or isinstance(completion_response, dict)
|
||||
): # tts returns a custom class
|
||||
usage_obj: Optional[Union[dict, Usage]] = completion_response.get( # type: ignore
|
||||
if isinstance(completion_response, dict):
|
||||
usage_obj: Optional[Union[dict, Usage]] = completion_response.get(
|
||||
"usage", {}
|
||||
)
|
||||
else:
|
||||
usage_obj = getattr(completion_response, "usage", {})
|
||||
if isinstance(usage_obj, BaseModel) and not isinstance(
|
||||
usage_obj, litellm.Usage
|
||||
):
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"sample_spec": {
|
||||
"max_tokens": "set to max_output_tokens if provider specifies it. IF not set to max_tokens provider specifies",
|
||||
"max_tokens": "LEGACY parameter. set to max_output_tokens if provider specifies it. IF not set to max_input_tokens, if provider specifies it.",
|
||||
"max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens",
|
||||
"max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens",
|
||||
"input_cost_per_token": 0.0000,
|
||||
|
@ -16,75 +16,23 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_system_messages": true
|
||||
},
|
||||
"sambanova/Meta-Llama-3.1-8B-Instruct": {
|
||||
"max_tokens": 16000,
|
||||
"max_input_tokens": 16000,
|
||||
"max_output_tokens": 16000,
|
||||
"input_cost_per_token": 0.0000001,
|
||||
"output_cost_per_token": 0.0000002,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
"omni-moderation-latest": {
|
||||
"max_tokens": 32768,
|
||||
"max_input_tokens": 32768,
|
||||
"max_output_tokens": 0,
|
||||
"input_cost_per_token": 0.0,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "moderation"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.1-70B-Instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.0000006,
|
||||
"output_cost_per_token": 0.0000012,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.1-405B-Instruct": {
|
||||
"max_tokens": 16000,
|
||||
"max_input_tokens": 16000,
|
||||
"max_output_tokens": 16000,
|
||||
"input_cost_per_token": 0.000005,
|
||||
"output_cost_per_token": 0.000010,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.2-1B-Instruct": {
|
||||
"max_tokens": 16000,
|
||||
"max_input_tokens": 16000,
|
||||
"max_output_tokens": 16000,
|
||||
"input_cost_per_token": 0.0000004,
|
||||
"output_cost_per_token": 0.0000008,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.2-3B-Instruct": {
|
||||
"max_tokens": 4000,
|
||||
"max_input_tokens": 4000,
|
||||
"max_output_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000008,
|
||||
"output_cost_per_token": 0.0000016,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Qwen2.5-Coder-32B-Instruct": {
|
||||
"max_tokens": 8000,
|
||||
"max_input_tokens": 8000,
|
||||
"max_output_tokens": 8000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000003,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Qwen2.5-72B-Instruct": {
|
||||
"max_tokens": 8000,
|
||||
"max_input_tokens": 8000,
|
||||
"max_output_tokens": 8000,
|
||||
"input_cost_per_token": 0.000002,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
"omni-moderation-2024-09-26": {
|
||||
"max_tokens": 32768,
|
||||
"max_input_tokens": 32768,
|
||||
"max_output_tokens": 0,
|
||||
"input_cost_per_token": 0.0,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "moderation"
|
||||
},
|
||||
"gpt-4": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -7939,5 +7887,75 @@
|
|||
"mode": "embedding",
|
||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||
},
|
||||
"sambanova/Meta-Llama-3.1-8B-Instruct": {
|
||||
"max_tokens": 16000,
|
||||
"max_input_tokens": 16000,
|
||||
"max_output_tokens": 16000,
|
||||
"input_cost_per_token": 0.0000001,
|
||||
"output_cost_per_token": 0.0000002,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.1-70B-Instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.0000006,
|
||||
"output_cost_per_token": 0.0000012,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.1-405B-Instruct": {
|
||||
"max_tokens": 16000,
|
||||
"max_input_tokens": 16000,
|
||||
"max_output_tokens": 16000,
|
||||
"input_cost_per_token": 0.000005,
|
||||
"output_cost_per_token": 0.000010,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.2-1B-Instruct": {
|
||||
"max_tokens": 16000,
|
||||
"max_input_tokens": 16000,
|
||||
"max_output_tokens": 16000,
|
||||
"input_cost_per_token": 0.0000004,
|
||||
"output_cost_per_token": 0.0000008,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.2-3B-Instruct": {
|
||||
"max_tokens": 4000,
|
||||
"max_input_tokens": 4000,
|
||||
"max_output_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000008,
|
||||
"output_cost_per_token": 0.0000016,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Qwen2.5-Coder-32B-Instruct": {
|
||||
"max_tokens": 8000,
|
||||
"max_input_tokens": 8000,
|
||||
"max_output_tokens": 8000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000003,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Qwen2.5-72B-Instruct": {
|
||||
"max_tokens": 8000,
|
||||
"max_input_tokens": 8000,
|
||||
"max_output_tokens": 8000,
|
||||
"input_cost_per_token": 0.000002,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"sample_spec": {
|
||||
"max_tokens": "set to max_output_tokens if provider specifies it. IF not set to max_tokens provider specifies",
|
||||
"max_tokens": "LEGACY parameter. set to max_output_tokens if provider specifies it. IF not set to max_input_tokens, if provider specifies it.",
|
||||
"max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens",
|
||||
"max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens",
|
||||
"input_cost_per_token": 0.0000,
|
||||
|
@ -16,75 +16,23 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_system_messages": true
|
||||
},
|
||||
"sambanova/Meta-Llama-3.1-8B-Instruct": {
|
||||
"max_tokens": 16000,
|
||||
"max_input_tokens": 16000,
|
||||
"max_output_tokens": 16000,
|
||||
"input_cost_per_token": 0.0000001,
|
||||
"output_cost_per_token": 0.0000002,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
"omni-moderation-latest": {
|
||||
"max_tokens": 32768,
|
||||
"max_input_tokens": 32768,
|
||||
"max_output_tokens": 0,
|
||||
"input_cost_per_token": 0.0,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "moderation"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.1-70B-Instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.0000006,
|
||||
"output_cost_per_token": 0.0000012,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.1-405B-Instruct": {
|
||||
"max_tokens": 16000,
|
||||
"max_input_tokens": 16000,
|
||||
"max_output_tokens": 16000,
|
||||
"input_cost_per_token": 0.000005,
|
||||
"output_cost_per_token": 0.000010,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.2-1B-Instruct": {
|
||||
"max_tokens": 16000,
|
||||
"max_input_tokens": 16000,
|
||||
"max_output_tokens": 16000,
|
||||
"input_cost_per_token": 0.0000004,
|
||||
"output_cost_per_token": 0.0000008,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.2-3B-Instruct": {
|
||||
"max_tokens": 4000,
|
||||
"max_input_tokens": 4000,
|
||||
"max_output_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000008,
|
||||
"output_cost_per_token": 0.0000016,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Qwen2.5-Coder-32B-Instruct": {
|
||||
"max_tokens": 8000,
|
||||
"max_input_tokens": 8000,
|
||||
"max_output_tokens": 8000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000003,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Qwen2.5-72B-Instruct": {
|
||||
"max_tokens": 8000,
|
||||
"max_input_tokens": 8000,
|
||||
"max_output_tokens": 8000,
|
||||
"input_cost_per_token": 0.000002,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
"omni-moderation-2024-09-26": {
|
||||
"max_tokens": 32768,
|
||||
"max_input_tokens": 32768,
|
||||
"max_output_tokens": 0,
|
||||
"input_cost_per_token": 0.0,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "moderation"
|
||||
},
|
||||
"gpt-4": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -7939,5 +7887,75 @@
|
|||
"mode": "embedding",
|
||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||
},
|
||||
"sambanova/Meta-Llama-3.1-8B-Instruct": {
|
||||
"max_tokens": 16000,
|
||||
"max_input_tokens": 16000,
|
||||
"max_output_tokens": 16000,
|
||||
"input_cost_per_token": 0.0000001,
|
||||
"output_cost_per_token": 0.0000002,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.1-70B-Instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.0000006,
|
||||
"output_cost_per_token": 0.0000012,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.1-405B-Instruct": {
|
||||
"max_tokens": 16000,
|
||||
"max_input_tokens": 16000,
|
||||
"max_output_tokens": 16000,
|
||||
"input_cost_per_token": 0.000005,
|
||||
"output_cost_per_token": 0.000010,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.2-1B-Instruct": {
|
||||
"max_tokens": 16000,
|
||||
"max_input_tokens": 16000,
|
||||
"max_output_tokens": 16000,
|
||||
"input_cost_per_token": 0.0000004,
|
||||
"output_cost_per_token": 0.0000008,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Meta-Llama-3.2-3B-Instruct": {
|
||||
"max_tokens": 4000,
|
||||
"max_input_tokens": 4000,
|
||||
"max_output_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000008,
|
||||
"output_cost_per_token": 0.0000016,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Qwen2.5-Coder-32B-Instruct": {
|
||||
"max_tokens": 8000,
|
||||
"max_input_tokens": 8000,
|
||||
"max_output_tokens": 8000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000003,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"sambanova/Qwen2.5-72B-Instruct": {
|
||||
"max_tokens": 8000,
|
||||
"max_input_tokens": 8000,
|
||||
"max_output_tokens": 8000,
|
||||
"input_cost_per_token": 0.000002,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "sambanova",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2704,6 +2704,24 @@ def test_select_model_name_for_cost_calc():
|
|||
assert return_model == "azure_ai/mistral-large"
|
||||
|
||||
|
||||
|
||||
def test_moderations():
|
||||
from litellm import moderation
|
||||
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
litellm.add_known_models()
|
||||
|
||||
assert "omni-moderation-latest" in litellm.model_cost
|
||||
print(
|
||||
f"litellm.model_cost['omni-moderation-latest']: {litellm.model_cost['omni-moderation-latest']}"
|
||||
)
|
||||
assert "omni-moderation-latest" in litellm.open_ai_chat_completion_models
|
||||
|
||||
response = moderation("I am a bad person", model="omni-moderation-latest")
|
||||
cost = completion_cost(response, model="omni-moderation-latest")
|
||||
assert cost == 0
|
||||
|
||||
def test_cost_calculator_azure_embedding():
|
||||
from litellm.cost_calculator import response_cost_calculator
|
||||
from litellm.types.utils import EmbeddingResponse, Usage
|
||||
|
|
|
@ -35,6 +35,9 @@ import {
|
|||
Col,
|
||||
Text,
|
||||
Grid,
|
||||
Accordion,
|
||||
AccordionHeader,
|
||||
AccordionBody,
|
||||
} from "@tremor/react";
|
||||
import { CogIcon } from "@heroicons/react/outline";
|
||||
const isLocal = process.env.NODE_ENV === "development";
|
||||
|
@ -365,6 +368,13 @@ const Team: React.FC<TeamProps> = ({
|
|||
if (accessToken != null) {
|
||||
const newTeamAlias = formValues?.team_alias;
|
||||
const existingTeamAliases = teams?.map((t) => t.team_alias) ?? [];
|
||||
let organizationId = formValues?.organization_id;
|
||||
if (organizationId === "" || typeof organizationId !== 'string') {
|
||||
formValues.organization_id = null;
|
||||
} else {
|
||||
formValues.organization_id = organizationId.trim();
|
||||
}
|
||||
|
||||
|
||||
if (existingTeamAliases.includes(newTeamAlias)) {
|
||||
throw new Error(
|
||||
|
@ -731,6 +741,25 @@ const Team: React.FC<TeamProps> = ({
|
|||
>
|
||||
<InputNumber step={1} width={400} />
|
||||
</Form.Item>
|
||||
<Accordion className="mt-20 mb-8">
|
||||
<AccordionHeader>
|
||||
<b>Additional Settings</b>
|
||||
</AccordionHeader>
|
||||
<AccordionBody>
|
||||
<Form.Item
|
||||
label="Organization ID"
|
||||
name="organization_id"
|
||||
help="Assign team to an organization. Found in the 'Organization' tab."
|
||||
>
|
||||
<TextInput
|
||||
placeholder=""
|
||||
onChange={(e) => {
|
||||
e.target.value = e.target.value.trim();
|
||||
}}
|
||||
/>
|
||||
</Form.Item>
|
||||
</AccordionBody>
|
||||
</Accordion>
|
||||
</>
|
||||
<div style={{ textAlign: "right", marginTop: "10px" }}>
|
||||
<Button2 htmlType="submit">Create Team</Button2>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue