Add pyright to ci/cd + Fix remaining type-checking errors (#6082)

* fix: fix type-checking errors * fix: fix additional type-checking errors * fix: additional type-checking error fixes * fix: fix additional type-checking errors * fix: additional type-check fixes * fix: fix all type-checking errors + add pyright to ci/cd * fix: fix incorrect import * ci(config.yml): use mypy on ci/cd * fix: fix type-checking errors in utils.py * fix: fix all type-checking errors on main.py * fix: fix mypy linting errors * fix(anthropic/cost_calculator.py): fix linting errors * fix: fix mypy linting errors * fix: fix linting errors
2024-10-05 17:04:00 -04:00 · 2024-10-05 17:04:00 -04:00 · fac3b2ee42
commit fac3b2ee42
parent f7ce1173f3
65 changed files with 619 additions and 522 deletions
--- a/tests/local_testing/test_alerting.py
+++ b/tests/local_testing/test_alerting.py
@ -14,7 +14,7 @@ from typing import Optional

 import httpx

-from litellm.integrations.SlackAlerting.types import AlertType
+from litellm.types.integrations.slack_alerting import AlertType

 # import logging
 # logging.basicConfig(level=logging.DEBUG)
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@ -1188,13 +1188,36 @@ def test_completion_cost_anthropic_prompt_caching():
        system_fingerprint=None,
        usage=Usage(
            completion_tokens=10,
-            prompt_tokens=14,
-            total_tokens=24,
+            prompt_tokens=114,
+            total_tokens=124,
+            prompt_tokens_details=PromptTokensDetails(cached_tokens=0),
            cache_creation_input_tokens=100,
            cache_read_input_tokens=0,
        ),
    )

+    cost_1 = completion_cost(model=model, completion_response=response_1)
+
+    _model_info = litellm.get_model_info(
+        model="claude-3-5-sonnet-20240620", custom_llm_provider="anthropic"
+    )
+    expected_cost = (
+        (
+            response_1.usage.prompt_tokens
+            - response_1.usage.prompt_tokens_details.cached_tokens
+        )
+        * _model_info["input_cost_per_token"]
+        + response_1.usage.prompt_tokens_details.cached_tokens
+        * _model_info["cache_read_input_token_cost"]
+        + response_1.usage.cache_creation_input_tokens
+        * _model_info["cache_creation_input_token_cost"]
+        + response_1.usage.completion_tokens * _model_info["output_cost_per_token"]
+    )  # Cost of processing (non-cache hit + cache hit) + Cost of cache-writing (cache writing)
+
+    assert round(expected_cost, 5) == round(cost_1, 5)
+
+    print(f"expected_cost: {expected_cost}, cost_1: {cost_1}")
+
    ## READ FROM CACHE ## (LESS EXPENSIVE)
    response_2 = ModelResponse(
        id="chatcmpl-3f427194-0840-4d08-b571-56bfe38a5424",
@ -1216,14 +1239,14 @@ def test_completion_cost_anthropic_prompt_caching():
        system_fingerprint=None,
        usage=Usage(
            completion_tokens=10,
-            prompt_tokens=14,
-            total_tokens=24,
+            prompt_tokens=114,
+            total_tokens=134,
+            prompt_tokens_details=PromptTokensDetails(cached_tokens=100),
            cache_creation_input_tokens=0,
            cache_read_input_tokens=100,
        ),
    )

-    cost_1 = completion_cost(model=model, completion_response=response_1)
    cost_2 = completion_cost(model=model, completion_response=response_2)

    assert cost_1 > cost_2
--- a/tests/local_testing/test_prompt_caching.py
+++ b/tests/local_testing/test_prompt_caching.py
@ -10,12 +10,40 @@ import litellm
 import pytest


+def _usage_format_tests(usage: litellm.Usage):
+    """
+    OpenAI prompt caching
+    - prompt_tokens = sum of non-cache hit tokens + cache-hit tokens
+    - total_tokens = prompt_tokens + completion_tokens
+
+    Example
+    ```
+    "usage": {
+        "prompt_tokens": 2006,
+        "completion_tokens": 300,
+        "total_tokens": 2306,
+        "prompt_tokens_details": {
+            "cached_tokens": 1920
+        },
+        "completion_tokens_details": {
+            "reasoning_tokens": 0
+        }
+        # ANTHROPIC_ONLY #
+        "cache_creation_input_tokens": 0
+    }
+    ```
+    """
+    assert usage.total_tokens == usage.prompt_tokens + usage.completion_tokens
+
+    assert usage.prompt_tokens > usage.prompt_tokens_details.cached_tokens
+
+
@pytest.mark.parametrize(
    "model",
    [
        "anthropic/claude-3-5-sonnet-20240620",
-        "openai/gpt-4o",
-        "deepseek/deepseek-chat",
+        # "openai/gpt-4o",
+        # "deepseek/deepseek-chat",
    ],
 )
 def test_prompt_caching_model(model):
@ -66,9 +94,13 @@ def test_prompt_caching_model(model):
            max_tokens=10,
        )

+        _usage_format_tests(response.usage)
+
    print("response=", response)
    print("response.usage=", response.usage)

+    _usage_format_tests(response.usage)
+
    assert "prompt_tokens_details" in response.usage
    assert response.usage.prompt_tokens_details.cached_tokens > 0