Allow assigning teams to org on UI + OpenAI omni-moderation cost model tracking (#7566)

* feat(cost_calculator.py): add cost tracking ($0) for openai moderations endpoint

removes sentry cost tracking errors caused by this

* build(teams.tsx): allow assigning teams to orgs
This commit is contained in:
Krish Dholakia 2025-01-08 16:58:21 -08:00 committed by GitHub
parent b769b826d0
commit 12a78fe05f
5 changed files with 227 additions and 141 deletions

View file

@ -545,9 +545,12 @@ def completion_cost( # noqa: PLR0915
isinstance(completion_response, BaseModel) isinstance(completion_response, BaseModel)
or isinstance(completion_response, dict) or isinstance(completion_response, dict)
): # tts returns a custom class ): # tts returns a custom class
usage_obj: Optional[Union[dict, Usage]] = completion_response.get( # type: ignore if isinstance(completion_response, dict):
"usage", {} usage_obj: Optional[Union[dict, Usage]] = completion_response.get(
) "usage", {}
)
else:
usage_obj = getattr(completion_response, "usage", {})
if isinstance(usage_obj, BaseModel) and not isinstance( if isinstance(usage_obj, BaseModel) and not isinstance(
usage_obj, litellm.Usage usage_obj, litellm.Usage
): ):

View file

@ -1,6 +1,6 @@
{ {
"sample_spec": { "sample_spec": {
"max_tokens": "set to max_output_tokens if provider specifies it. IF not set to max_tokens provider specifies", "max_tokens": "LEGACY parameter. set to max_output_tokens if provider specifies it. IF not set to max_input_tokens, if provider specifies it.",
"max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens", "max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens",
"max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens", "max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens",
"input_cost_per_token": 0.0000, "input_cost_per_token": 0.0000,
@ -16,75 +16,23 @@
"supports_response_schema": true, "supports_response_schema": true,
"supports_system_messages": true "supports_system_messages": true
}, },
"sambanova/Meta-Llama-3.1-8B-Instruct": { "omni-moderation-latest": {
"max_tokens": 16000, "max_tokens": 32768,
"max_input_tokens": 16000, "max_input_tokens": 32768,
"max_output_tokens": 16000, "max_output_tokens": 0,
"input_cost_per_token": 0.0000001, "input_cost_per_token": 0.0,
"output_cost_per_token": 0.0000002, "output_cost_per_token": 0.0,
"litellm_provider": "sambanova", "litellm_provider": "openai",
"supports_function_calling": true, "mode": "moderation"
"mode": "chat"
}, },
"sambanova/Meta-Llama-3.1-70B-Instruct": { "omni-moderation-2024-09-26": {
"max_tokens": 128000, "max_tokens": 32768,
"max_input_tokens": 128000, "max_input_tokens": 32768,
"max_output_tokens": 128000, "max_output_tokens": 0,
"input_cost_per_token": 0.0000006, "input_cost_per_token": 0.0,
"output_cost_per_token": 0.0000012, "output_cost_per_token": 0.0,
"litellm_provider": "sambanova", "litellm_provider": "openai",
"supports_function_calling": true, "mode": "moderation"
"mode": "chat"
},
"sambanova/Meta-Llama-3.1-405B-Instruct": {
"max_tokens": 16000,
"max_input_tokens": 16000,
"max_output_tokens": 16000,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000010,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Meta-Llama-3.2-1B-Instruct": {
"max_tokens": 16000,
"max_input_tokens": 16000,
"max_output_tokens": 16000,
"input_cost_per_token": 0.0000004,
"output_cost_per_token": 0.0000008,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Meta-Llama-3.2-3B-Instruct": {
"max_tokens": 4000,
"max_input_tokens": 4000,
"max_output_tokens": 4000,
"input_cost_per_token": 0.0000008,
"output_cost_per_token": 0.0000016,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Qwen2.5-Coder-32B-Instruct": {
"max_tokens": 8000,
"max_input_tokens": 8000,
"max_output_tokens": 8000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000003,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Qwen2.5-72B-Instruct": {
"max_tokens": 8000,
"max_input_tokens": 8000,
"max_output_tokens": 8000,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000004,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
}, },
"gpt-4": { "gpt-4": {
"max_tokens": 4096, "max_tokens": 4096,
@ -7939,5 +7887,75 @@
"mode": "embedding", "mode": "embedding",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving", "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
},
"sambanova/Meta-Llama-3.1-8B-Instruct": {
"max_tokens": 16000,
"max_input_tokens": 16000,
"max_output_tokens": 16000,
"input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.0000002,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Meta-Llama-3.1-70B-Instruct": {
"max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 128000,
"input_cost_per_token": 0.0000006,
"output_cost_per_token": 0.0000012,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Meta-Llama-3.1-405B-Instruct": {
"max_tokens": 16000,
"max_input_tokens": 16000,
"max_output_tokens": 16000,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000010,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Meta-Llama-3.2-1B-Instruct": {
"max_tokens": 16000,
"max_input_tokens": 16000,
"max_output_tokens": 16000,
"input_cost_per_token": 0.0000004,
"output_cost_per_token": 0.0000008,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Meta-Llama-3.2-3B-Instruct": {
"max_tokens": 4000,
"max_input_tokens": 4000,
"max_output_tokens": 4000,
"input_cost_per_token": 0.0000008,
"output_cost_per_token": 0.0000016,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Qwen2.5-Coder-32B-Instruct": {
"max_tokens": 8000,
"max_input_tokens": 8000,
"max_output_tokens": 8000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000003,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Qwen2.5-72B-Instruct": {
"max_tokens": 8000,
"max_input_tokens": 8000,
"max_output_tokens": 8000,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000004,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
} }
} }

View file

@ -1,6 +1,6 @@
{ {
"sample_spec": { "sample_spec": {
"max_tokens": "set to max_output_tokens if provider specifies it. IF not set to max_tokens provider specifies", "max_tokens": "LEGACY parameter. set to max_output_tokens if provider specifies it. IF not set to max_input_tokens, if provider specifies it.",
"max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens", "max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens",
"max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens", "max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens",
"input_cost_per_token": 0.0000, "input_cost_per_token": 0.0000,
@ -16,75 +16,23 @@
"supports_response_schema": true, "supports_response_schema": true,
"supports_system_messages": true "supports_system_messages": true
}, },
"sambanova/Meta-Llama-3.1-8B-Instruct": { "omni-moderation-latest": {
"max_tokens": 16000, "max_tokens": 32768,
"max_input_tokens": 16000, "max_input_tokens": 32768,
"max_output_tokens": 16000, "max_output_tokens": 0,
"input_cost_per_token": 0.0000001, "input_cost_per_token": 0.0,
"output_cost_per_token": 0.0000002, "output_cost_per_token": 0.0,
"litellm_provider": "sambanova", "litellm_provider": "openai",
"supports_function_calling": true, "mode": "moderation"
"mode": "chat"
}, },
"sambanova/Meta-Llama-3.1-70B-Instruct": { "omni-moderation-2024-09-26": {
"max_tokens": 128000, "max_tokens": 32768,
"max_input_tokens": 128000, "max_input_tokens": 32768,
"max_output_tokens": 128000, "max_output_tokens": 0,
"input_cost_per_token": 0.0000006, "input_cost_per_token": 0.0,
"output_cost_per_token": 0.0000012, "output_cost_per_token": 0.0,
"litellm_provider": "sambanova", "litellm_provider": "openai",
"supports_function_calling": true, "mode": "moderation"
"mode": "chat"
},
"sambanova/Meta-Llama-3.1-405B-Instruct": {
"max_tokens": 16000,
"max_input_tokens": 16000,
"max_output_tokens": 16000,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000010,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Meta-Llama-3.2-1B-Instruct": {
"max_tokens": 16000,
"max_input_tokens": 16000,
"max_output_tokens": 16000,
"input_cost_per_token": 0.0000004,
"output_cost_per_token": 0.0000008,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Meta-Llama-3.2-3B-Instruct": {
"max_tokens": 4000,
"max_input_tokens": 4000,
"max_output_tokens": 4000,
"input_cost_per_token": 0.0000008,
"output_cost_per_token": 0.0000016,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Qwen2.5-Coder-32B-Instruct": {
"max_tokens": 8000,
"max_input_tokens": 8000,
"max_output_tokens": 8000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000003,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Qwen2.5-72B-Instruct": {
"max_tokens": 8000,
"max_input_tokens": 8000,
"max_output_tokens": 8000,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000004,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
}, },
"gpt-4": { "gpt-4": {
"max_tokens": 4096, "max_tokens": 4096,
@ -7939,5 +7887,75 @@
"mode": "embedding", "mode": "embedding",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving", "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
},
"sambanova/Meta-Llama-3.1-8B-Instruct": {
"max_tokens": 16000,
"max_input_tokens": 16000,
"max_output_tokens": 16000,
"input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.0000002,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Meta-Llama-3.1-70B-Instruct": {
"max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 128000,
"input_cost_per_token": 0.0000006,
"output_cost_per_token": 0.0000012,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Meta-Llama-3.1-405B-Instruct": {
"max_tokens": 16000,
"max_input_tokens": 16000,
"max_output_tokens": 16000,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000010,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Meta-Llama-3.2-1B-Instruct": {
"max_tokens": 16000,
"max_input_tokens": 16000,
"max_output_tokens": 16000,
"input_cost_per_token": 0.0000004,
"output_cost_per_token": 0.0000008,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Meta-Llama-3.2-3B-Instruct": {
"max_tokens": 4000,
"max_input_tokens": 4000,
"max_output_tokens": 4000,
"input_cost_per_token": 0.0000008,
"output_cost_per_token": 0.0000016,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Qwen2.5-Coder-32B-Instruct": {
"max_tokens": 8000,
"max_input_tokens": 8000,
"max_output_tokens": 8000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000003,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
},
"sambanova/Qwen2.5-72B-Instruct": {
"max_tokens": 8000,
"max_input_tokens": 8000,
"max_output_tokens": 8000,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000004,
"litellm_provider": "sambanova",
"supports_function_calling": true,
"mode": "chat"
} }
} }

View file

@ -2704,6 +2704,24 @@ def test_select_model_name_for_cost_calc():
assert return_model == "azure_ai/mistral-large" assert return_model == "azure_ai/mistral-large"
def test_moderations():
from litellm import moderation
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.add_known_models()
assert "omni-moderation-latest" in litellm.model_cost
print(
f"litellm.model_cost['omni-moderation-latest']: {litellm.model_cost['omni-moderation-latest']}"
)
assert "omni-moderation-latest" in litellm.open_ai_chat_completion_models
response = moderation("I am a bad person", model="omni-moderation-latest")
cost = completion_cost(response, model="omni-moderation-latest")
assert cost == 0
def test_cost_calculator_azure_embedding(): def test_cost_calculator_azure_embedding():
from litellm.cost_calculator import response_cost_calculator from litellm.cost_calculator import response_cost_calculator
from litellm.types.utils import EmbeddingResponse, Usage from litellm.types.utils import EmbeddingResponse, Usage

View file

@ -35,6 +35,9 @@ import {
Col, Col,
Text, Text,
Grid, Grid,
Accordion,
AccordionHeader,
AccordionBody,
} from "@tremor/react"; } from "@tremor/react";
import { CogIcon } from "@heroicons/react/outline"; import { CogIcon } from "@heroicons/react/outline";
const isLocal = process.env.NODE_ENV === "development"; const isLocal = process.env.NODE_ENV === "development";
@ -365,6 +368,13 @@ const Team: React.FC<TeamProps> = ({
if (accessToken != null) { if (accessToken != null) {
const newTeamAlias = formValues?.team_alias; const newTeamAlias = formValues?.team_alias;
const existingTeamAliases = teams?.map((t) => t.team_alias) ?? []; const existingTeamAliases = teams?.map((t) => t.team_alias) ?? [];
let organizationId = formValues?.organization_id;
if (organizationId === "" || typeof organizationId !== 'string') {
formValues.organization_id = null;
} else {
formValues.organization_id = organizationId.trim();
}
if (existingTeamAliases.includes(newTeamAlias)) { if (existingTeamAliases.includes(newTeamAlias)) {
throw new Error( throw new Error(
@ -731,6 +741,25 @@ const Team: React.FC<TeamProps> = ({
> >
<InputNumber step={1} width={400} /> <InputNumber step={1} width={400} />
</Form.Item> </Form.Item>
<Accordion className="mt-20 mb-8">
<AccordionHeader>
<b>Additional Settings</b>
</AccordionHeader>
<AccordionBody>
<Form.Item
label="Organization ID"
name="organization_id"
help="Assign team to an organization. Found in the 'Organization' tab."
>
<TextInput
placeholder=""
onChange={(e) => {
e.target.value = e.target.value.trim();
}}
/>
</Form.Item>
</AccordionBody>
</Accordion>
</> </>
<div style={{ textAlign: "right", marginTop: "10px" }}> <div style={{ textAlign: "right", marginTop: "10px" }}>
<Button2 htmlType="submit">Create Team</Button2> <Button2 htmlType="submit">Create Team</Button2>