forked from phoenix/litellm-mirror
Add pyright to ci/cd + Fix remaining type-checking errors (#6082)
* fix: fix type-checking errors * fix: fix additional type-checking errors * fix: additional type-checking error fixes * fix: fix additional type-checking errors * fix: additional type-check fixes * fix: fix all type-checking errors + add pyright to ci/cd * fix: fix incorrect import * ci(config.yml): use mypy on ci/cd * fix: fix type-checking errors in utils.py * fix: fix all type-checking errors on main.py * fix: fix mypy linting errors * fix(anthropic/cost_calculator.py): fix linting errors * fix: fix mypy linting errors * fix: fix linting errors
This commit is contained in:
parent
f7ce1173f3
commit
fac3b2ee42
65 changed files with 619 additions and 522 deletions
|
@ -14,7 +14,7 @@ from typing import Optional
|
|||
|
||||
import httpx
|
||||
|
||||
from litellm.integrations.SlackAlerting.types import AlertType
|
||||
from litellm.types.integrations.slack_alerting import AlertType
|
||||
|
||||
# import logging
|
||||
# logging.basicConfig(level=logging.DEBUG)
|
||||
|
|
|
@ -1188,13 +1188,36 @@ def test_completion_cost_anthropic_prompt_caching():
|
|||
system_fingerprint=None,
|
||||
usage=Usage(
|
||||
completion_tokens=10,
|
||||
prompt_tokens=14,
|
||||
total_tokens=24,
|
||||
prompt_tokens=114,
|
||||
total_tokens=124,
|
||||
prompt_tokens_details=PromptTokensDetails(cached_tokens=0),
|
||||
cache_creation_input_tokens=100,
|
||||
cache_read_input_tokens=0,
|
||||
),
|
||||
)
|
||||
|
||||
cost_1 = completion_cost(model=model, completion_response=response_1)
|
||||
|
||||
_model_info = litellm.get_model_info(
|
||||
model="claude-3-5-sonnet-20240620", custom_llm_provider="anthropic"
|
||||
)
|
||||
expected_cost = (
|
||||
(
|
||||
response_1.usage.prompt_tokens
|
||||
- response_1.usage.prompt_tokens_details.cached_tokens
|
||||
)
|
||||
* _model_info["input_cost_per_token"]
|
||||
+ response_1.usage.prompt_tokens_details.cached_tokens
|
||||
* _model_info["cache_read_input_token_cost"]
|
||||
+ response_1.usage.cache_creation_input_tokens
|
||||
* _model_info["cache_creation_input_token_cost"]
|
||||
+ response_1.usage.completion_tokens * _model_info["output_cost_per_token"]
|
||||
) # Cost of processing (non-cache hit + cache hit) + Cost of cache-writing (cache writing)
|
||||
|
||||
assert round(expected_cost, 5) == round(cost_1, 5)
|
||||
|
||||
print(f"expected_cost: {expected_cost}, cost_1: {cost_1}")
|
||||
|
||||
## READ FROM CACHE ## (LESS EXPENSIVE)
|
||||
response_2 = ModelResponse(
|
||||
id="chatcmpl-3f427194-0840-4d08-b571-56bfe38a5424",
|
||||
|
@ -1216,14 +1239,14 @@ def test_completion_cost_anthropic_prompt_caching():
|
|||
system_fingerprint=None,
|
||||
usage=Usage(
|
||||
completion_tokens=10,
|
||||
prompt_tokens=14,
|
||||
total_tokens=24,
|
||||
prompt_tokens=114,
|
||||
total_tokens=134,
|
||||
prompt_tokens_details=PromptTokensDetails(cached_tokens=100),
|
||||
cache_creation_input_tokens=0,
|
||||
cache_read_input_tokens=100,
|
||||
),
|
||||
)
|
||||
|
||||
cost_1 = completion_cost(model=model, completion_response=response_1)
|
||||
cost_2 = completion_cost(model=model, completion_response=response_2)
|
||||
|
||||
assert cost_1 > cost_2
|
||||
|
|
|
@ -10,12 +10,40 @@ import litellm
|
|||
import pytest
|
||||
|
||||
|
||||
def _usage_format_tests(usage: litellm.Usage):
|
||||
"""
|
||||
OpenAI prompt caching
|
||||
- prompt_tokens = sum of non-cache hit tokens + cache-hit tokens
|
||||
- total_tokens = prompt_tokens + completion_tokens
|
||||
|
||||
Example
|
||||
```
|
||||
"usage": {
|
||||
"prompt_tokens": 2006,
|
||||
"completion_tokens": 300,
|
||||
"total_tokens": 2306,
|
||||
"prompt_tokens_details": {
|
||||
"cached_tokens": 1920
|
||||
},
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 0
|
||||
}
|
||||
# ANTHROPIC_ONLY #
|
||||
"cache_creation_input_tokens": 0
|
||||
}
|
||||
```
|
||||
"""
|
||||
assert usage.total_tokens == usage.prompt_tokens + usage.completion_tokens
|
||||
|
||||
assert usage.prompt_tokens > usage.prompt_tokens_details.cached_tokens
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"anthropic/claude-3-5-sonnet-20240620",
|
||||
"openai/gpt-4o",
|
||||
"deepseek/deepseek-chat",
|
||||
# "openai/gpt-4o",
|
||||
# "deepseek/deepseek-chat",
|
||||
],
|
||||
)
|
||||
def test_prompt_caching_model(model):
|
||||
|
@ -66,9 +94,13 @@ def test_prompt_caching_model(model):
|
|||
max_tokens=10,
|
||||
)
|
||||
|
||||
_usage_format_tests(response.usage)
|
||||
|
||||
print("response=", response)
|
||||
print("response.usage=", response.usage)
|
||||
|
||||
_usage_format_tests(response.usage)
|
||||
|
||||
assert "prompt_tokens_details" in response.usage
|
||||
assert response.usage.prompt_tokens_details.cached_tokens > 0
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue