(feat) add azure openai cost tracking for prompt caching (#6077)

* add azure o1 models to model cost map

* add azure o1 cost tracking

* fix azure cost calc

* add get llm provider test
This commit is contained in:
Ishaan Jaff 2024-10-05 15:04:18 +05:30 committed by GitHub
parent 7267852511
commit ab0b536143
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 160 additions and 0 deletions

View file

@ -1295,6 +1295,93 @@ def test_completion_cost_fireworks_ai(model):
cost = completion_cost(completion_response=resp)
def test_cost_azure_openai_prompt_caching():
from litellm.utils import Choices, Message, ModelResponse, Usage
from litellm.types.utils import PromptTokensDetails, CompletionTokensDetails
from litellm import get_model_info
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
model = "azure/o1-mini"
## LLM API CALL ## (MORE EXPENSIVE)
response_1 = ModelResponse(
id="chatcmpl-3f427194-0840-4d08-b571-56bfe38a5424",
choices=[
Choices(
finish_reason="length",
index=0,
message=Message(
content="Hello! I'm doing well, thank you for",
role="assistant",
tool_calls=None,
function_call=None,
),
)
],
created=1725036547,
model=model,
object="chat.completion",
system_fingerprint=None,
usage=Usage(
completion_tokens=10,
prompt_tokens=14,
total_tokens=24,
completion_tokens_details=CompletionTokensDetails(reasoning_tokens=2),
),
)
## PROMPT CACHE HIT ## (LESS EXPENSIVE)
response_2 = ModelResponse(
id="chatcmpl-3f427194-0840-4d08-b571-56bfe38a5424",
choices=[
Choices(
finish_reason="length",
index=0,
message=Message(
content="Hello! I'm doing well, thank you for",
role="assistant",
tool_calls=None,
function_call=None,
),
)
],
created=1725036547,
model=model,
object="chat.completion",
system_fingerprint=None,
usage=Usage(
completion_tokens=10,
prompt_tokens=0,
total_tokens=10,
prompt_tokens_details=PromptTokensDetails(
cached_tokens=14,
),
completion_tokens_details=CompletionTokensDetails(reasoning_tokens=2),
),
)
cost_1 = completion_cost(model=model, completion_response=response_1)
cost_2 = completion_cost(model=model, completion_response=response_2)
assert cost_1 > cost_2
model_info = get_model_info(model=model, custom_llm_provider="azure")
usage = response_2.usage
_expected_cost2 = (
usage.prompt_tokens * model_info["input_cost_per_token"]
+ usage.completion_tokens * model_info["output_cost_per_token"]
+ usage.prompt_tokens_details.cached_tokens
* model_info["cache_read_input_token_cost"]
)
print("_expected_cost2", _expected_cost2)
print("cost_2", cost_2)
assert cost_2 == _expected_cost2
def test_completion_cost_vertex_llama3():
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")

View file

@ -115,3 +115,12 @@ def test_get_llm_provider_cohere_chat_test2():
print("api_base=", api_base)
assert custom_llm_provider == "cohere_chat"
assert model == "command-r-plus"
def test_get_llm_provider_azure_o1():
model, custom_llm_provider, dynamic_api_key, api_base = litellm.get_llm_provider(
model="azure/o1-mini",
)
assert custom_llm_provider == "azure"
assert model == "o1-mini"