Add pyright to ci/cd + Fix remaining type-checking errors (#6082)

* fix: fix type-checking errors

* fix: fix additional type-checking errors

* fix: additional type-checking error fixes

* fix: fix additional type-checking errors

* fix: additional type-check fixes

* fix: fix all type-checking errors + add pyright to ci/cd

* fix: fix incorrect import

* ci(config.yml): use mypy on ci/cd

* fix: fix type-checking errors in utils.py

* fix: fix all type-checking errors on main.py

* fix: fix mypy linting errors

* fix(anthropic/cost_calculator.py): fix linting errors

* fix: fix mypy linting errors

* fix: fix linting errors
This commit is contained in:
Krish Dholakia 2024-10-05 17:04:00 -04:00 committed by GitHub
parent f7ce1173f3
commit fac3b2ee42
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
65 changed files with 619 additions and 522 deletions

View file

@ -14,7 +14,7 @@ from typing import Optional
import httpx
from litellm.integrations.SlackAlerting.types import AlertType
from litellm.types.integrations.slack_alerting import AlertType
# import logging
# logging.basicConfig(level=logging.DEBUG)

View file

@ -1188,13 +1188,36 @@ def test_completion_cost_anthropic_prompt_caching():
system_fingerprint=None,
usage=Usage(
completion_tokens=10,
prompt_tokens=14,
total_tokens=24,
prompt_tokens=114,
total_tokens=124,
prompt_tokens_details=PromptTokensDetails(cached_tokens=0),
cache_creation_input_tokens=100,
cache_read_input_tokens=0,
),
)
cost_1 = completion_cost(model=model, completion_response=response_1)
_model_info = litellm.get_model_info(
model="claude-3-5-sonnet-20240620", custom_llm_provider="anthropic"
)
expected_cost = (
(
response_1.usage.prompt_tokens
- response_1.usage.prompt_tokens_details.cached_tokens
)
* _model_info["input_cost_per_token"]
+ response_1.usage.prompt_tokens_details.cached_tokens
* _model_info["cache_read_input_token_cost"]
+ response_1.usage.cache_creation_input_tokens
* _model_info["cache_creation_input_token_cost"]
+ response_1.usage.completion_tokens * _model_info["output_cost_per_token"]
) # Cost of processing (non-cache hit + cache hit) + Cost of cache-writing (cache writing)
assert round(expected_cost, 5) == round(cost_1, 5)
print(f"expected_cost: {expected_cost}, cost_1: {cost_1}")
## READ FROM CACHE ## (LESS EXPENSIVE)
response_2 = ModelResponse(
id="chatcmpl-3f427194-0840-4d08-b571-56bfe38a5424",
@ -1216,14 +1239,14 @@ def test_completion_cost_anthropic_prompt_caching():
system_fingerprint=None,
usage=Usage(
completion_tokens=10,
prompt_tokens=14,
total_tokens=24,
prompt_tokens=114,
total_tokens=134,
prompt_tokens_details=PromptTokensDetails(cached_tokens=100),
cache_creation_input_tokens=0,
cache_read_input_tokens=100,
),
)
cost_1 = completion_cost(model=model, completion_response=response_1)
cost_2 = completion_cost(model=model, completion_response=response_2)
assert cost_1 > cost_2

View file

@ -10,12 +10,40 @@ import litellm
import pytest
def _usage_format_tests(usage: litellm.Usage):
"""
OpenAI prompt caching
- prompt_tokens = sum of non-cache hit tokens + cache-hit tokens
- total_tokens = prompt_tokens + completion_tokens
Example
```
"usage": {
"prompt_tokens": 2006,
"completion_tokens": 300,
"total_tokens": 2306,
"prompt_tokens_details": {
"cached_tokens": 1920
},
"completion_tokens_details": {
"reasoning_tokens": 0
}
# ANTHROPIC_ONLY #
"cache_creation_input_tokens": 0
}
```
"""
assert usage.total_tokens == usage.prompt_tokens + usage.completion_tokens
assert usage.prompt_tokens > usage.prompt_tokens_details.cached_tokens
@pytest.mark.parametrize(
"model",
[
"anthropic/claude-3-5-sonnet-20240620",
"openai/gpt-4o",
"deepseek/deepseek-chat",
# "openai/gpt-4o",
# "deepseek/deepseek-chat",
],
)
def test_prompt_caching_model(model):
@ -66,9 +94,13 @@ def test_prompt_caching_model(model):
max_tokens=10,
)
_usage_format_tests(response.usage)
print("response=", response)
print("response.usage=", response.usage)
_usage_format_tests(response.usage)
assert "prompt_tokens_details" in response.usage
assert response.usage.prompt_tokens_details.cached_tokens > 0