(feat) add azure openai cost tracking for prompt caching (#6077)

* add azure o1 models to model cost map * add azure o1 cost tracking * fix azure cost calc * add get llm provider test
2024-10-05 15:04:18 +05:30 · 2024-10-05 15:04:18 +05:30 · ab0b536143
commit ab0b536143
parent 7267852511
4 changed files with 160 additions and 0 deletions
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@ -1295,6 +1295,93 @@ def test_completion_cost_fireworks_ai(model):
    cost = completion_cost(completion_response=resp)


+def test_cost_azure_openai_prompt_caching():
+    from litellm.utils import Choices, Message, ModelResponse, Usage
+    from litellm.types.utils import PromptTokensDetails, CompletionTokensDetails
+    from litellm import get_model_info
+
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    model = "azure/o1-mini"
+
+    ## LLM API CALL ## (MORE EXPENSIVE)
+    response_1 = ModelResponse(
+        id="chatcmpl-3f427194-0840-4d08-b571-56bfe38a5424",
+        choices=[
+            Choices(
+                finish_reason="length",
+                index=0,
+                message=Message(
+                    content="Hello! I'm doing well, thank you for",
+                    role="assistant",
+                    tool_calls=None,
+                    function_call=None,
+                ),
+            )
+        ],
+        created=1725036547,
+        model=model,
+        object="chat.completion",
+        system_fingerprint=None,
+        usage=Usage(
+            completion_tokens=10,
+            prompt_tokens=14,
+            total_tokens=24,
+            completion_tokens_details=CompletionTokensDetails(reasoning_tokens=2),
+        ),
+    )
+
+    ## PROMPT CACHE HIT ## (LESS EXPENSIVE)
+    response_2 = ModelResponse(
+        id="chatcmpl-3f427194-0840-4d08-b571-56bfe38a5424",
+        choices=[
+            Choices(
+                finish_reason="length",
+                index=0,
+                message=Message(
+                    content="Hello! I'm doing well, thank you for",
+                    role="assistant",
+                    tool_calls=None,
+                    function_call=None,
+                ),
+            )
+        ],
+        created=1725036547,
+        model=model,
+        object="chat.completion",
+        system_fingerprint=None,
+        usage=Usage(
+            completion_tokens=10,
+            prompt_tokens=0,
+            total_tokens=10,
+            prompt_tokens_details=PromptTokensDetails(
+                cached_tokens=14,
+            ),
+            completion_tokens_details=CompletionTokensDetails(reasoning_tokens=2),
+        ),
+    )
+
+    cost_1 = completion_cost(model=model, completion_response=response_1)
+    cost_2 = completion_cost(model=model, completion_response=response_2)
+    assert cost_1 > cost_2
+
+    model_info = get_model_info(model=model, custom_llm_provider="azure")
+    usage = response_2.usage
+
+    _expected_cost2 = (
+        usage.prompt_tokens * model_info["input_cost_per_token"]
+        + usage.completion_tokens * model_info["output_cost_per_token"]
+        + usage.prompt_tokens_details.cached_tokens
+        * model_info["cache_read_input_token_cost"]
+    )
+
+    print("_expected_cost2", _expected_cost2)
+    print("cost_2", cost_2)
+
+    assert cost_2 == _expected_cost2
+
+
 def test_completion_cost_vertex_llama3():
    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
    litellm.model_cost = litellm.get_model_cost_map(url="")
--- a/tests/local_testing/test_get_llm_provider.py
+++ b/tests/local_testing/test_get_llm_provider.py
@ -115,3 +115,12 @@ def test_get_llm_provider_cohere_chat_test2():
    print("api_base=", api_base)
    assert custom_llm_provider == "cohere_chat"
    assert model == "command-r-plus"
+
+
+def test_get_llm_provider_azure_o1():
+
+    model, custom_llm_provider, dynamic_api_key, api_base = litellm.get_llm_provider(
+        model="azure/o1-mini",
+    )
+    assert custom_llm_provider == "azure"
+    assert model == "o1-mini"