litellm-mirror/tests/local_testing/test_prompt_caching.py
Krish Dholakia fac3b2ee42
Add pyright to ci/cd + Fix remaining type-checking errors (#6082)
* fix: fix type-checking errors

* fix: fix additional type-checking errors

* fix: additional type-checking error fixes

* fix: fix additional type-checking errors

* fix: additional type-check fixes

* fix: fix all type-checking errors + add pyright to ci/cd

* fix: fix incorrect import

* ci(config.yml): use mypy on ci/cd

* fix: fix type-checking errors in utils.py

* fix: fix all type-checking errors on main.py

* fix: fix mypy linting errors

* fix(anthropic/cost_calculator.py): fix linting errors

* fix: fix mypy linting errors

* fix: fix linting errors
2024-10-05 17:04:00 -04:00

113 lines
3.6 KiB
Python

"""Asserts that prompt caching information is correctly returned for Anthropic, OpenAI, and Deepseek"""
import io
import os
import sys
sys.path.insert(0, os.path.abspath("../.."))
import litellm
import pytest
def _usage_format_tests(usage: litellm.Usage):
"""
OpenAI prompt caching
- prompt_tokens = sum of non-cache hit tokens + cache-hit tokens
- total_tokens = prompt_tokens + completion_tokens
Example
```
"usage": {
"prompt_tokens": 2006,
"completion_tokens": 300,
"total_tokens": 2306,
"prompt_tokens_details": {
"cached_tokens": 1920
},
"completion_tokens_details": {
"reasoning_tokens": 0
}
# ANTHROPIC_ONLY #
"cache_creation_input_tokens": 0
}
```
"""
assert usage.total_tokens == usage.prompt_tokens + usage.completion_tokens
assert usage.prompt_tokens > usage.prompt_tokens_details.cached_tokens
@pytest.mark.parametrize(
"model",
[
"anthropic/claude-3-5-sonnet-20240620",
# "openai/gpt-4o",
# "deepseek/deepseek-chat",
],
)
def test_prompt_caching_model(model):
for _ in range(2):
response = litellm.completion(
model=model,
messages=[
# System Message
{
"role": "system",
"content": [
{
"type": "text",
"text": "Here is the full text of a complex legal agreement"
* 400,
"cache_control": {"type": "ephemeral"},
}
],
},
# marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
{
"role": "user",
"content": [
{
"type": "text",
"text": "What are the key terms and conditions in this agreement?",
"cache_control": {"type": "ephemeral"},
}
],
},
{
"role": "assistant",
"content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
},
# The final turn is marked with cache-control, for continuing in followups.
{
"role": "user",
"content": [
{
"type": "text",
"text": "What are the key terms and conditions in this agreement?",
"cache_control": {"type": "ephemeral"},
}
],
},
],
temperature=0.2,
max_tokens=10,
)
_usage_format_tests(response.usage)
print("response=", response)
print("response.usage=", response.usage)
_usage_format_tests(response.usage)
assert "prompt_tokens_details" in response.usage
assert response.usage.prompt_tokens_details.cached_tokens > 0
# assert "cache_read_input_tokens" in response.usage
# assert "cache_creation_input_tokens" in response.usage
# # Assert either a cache entry was created or cache was read - changes depending on the anthropic api ttl
# assert (response.usage.cache_read_input_tokens > 0) or (
# response.usage.cache_creation_input_tokens > 0
# )