From b17437e7b19e2387b75b9caecf698083c84fa85e Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 14 Aug 2024 16:28:12 -0700
Subject: [PATCH] move claude prompt caching to diff file

---
 .../tests/test_anthropic_prompt_caching.py    | 222 ++++++++++++++++++
 litellm/tests/test_completion.py              | 179 --------------
 2 files changed, 222 insertions(+), 179 deletions(-)
 create mode 100644 litellm/tests/test_anthropic_prompt_caching.py

diff --git a/litellm/tests/test_anthropic_prompt_caching.py b/litellm/tests/test_anthropic_prompt_caching.py
new file mode 100644
index 0000000000..8f57e96065
--- /dev/null
+++ b/litellm/tests/test_anthropic_prompt_caching.py
@@ -0,0 +1,222 @@
+import json
+import os
+import sys
+import traceback
+
+from dotenv import load_dotenv
+
+load_dotenv()
+import io
+import os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+import os
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+import litellm
+from litellm import RateLimitError, Timeout, completion, completion_cost, embedding
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.llms.prompt_templates.factory import anthropic_messages_pt
+
+# litellm.num_retries =3
+litellm.cache = None
+litellm.success_callback = []
+user_message = "Write a short poem about the sky"
+messages = [{"content": user_message, "role": "user"}]
+
+
+def logger_fn(user_model_dict):
+    print(f"user_model_dict: {user_model_dict}")
+
+
+@pytest.fixture(autouse=True)
+def reset_callbacks():
+    print("\npytest fixture - resetting callbacks")
+    litellm.success_callback = []
+    litellm._async_success_callback = []
+    litellm.failure_callback = []
+    litellm.callbacks = []
+
+
+@pytest.mark.asyncio
+async def test_litellm_anthropic_prompt_caching_tools():
+    # Arrange: Set up the MagicMock for the httpx.AsyncClient
+    mock_response = AsyncMock()
+
+    def return_val():
+        return {
+            "id": "msg_01XFDUDYJgAACzvnptvVoYEL",
+            "type": "message",
+            "role": "assistant",
+            "content": [{"type": "text", "text": "Hello!"}],
+            "model": "claude-3-5-sonnet-20240620",
+            "stop_reason": "end_turn",
+            "stop_sequence": None,
+            "usage": {"input_tokens": 12, "output_tokens": 6},
+        }
+
+    mock_response.json = return_val
+
+    litellm.set_verbose = True
+    with patch(
+        "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
+        return_value=mock_response,
+    ) as mock_post:
+        # Act: Call the litellm.acompletion function
+        response = await litellm.acompletion(
+            api_key="mock_api_key",
+            model="anthropic/claude-3-5-sonnet-20240620",
+            messages=[
+                {"role": "user", "content": "What's the weather like in Boston today?"}
+            ],
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_current_weather",
+                        "description": "Get the current weather in a given location",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "location": {
+                                    "type": "string",
+                                    "description": "The city and state, e.g. San Francisco, CA",
+                                },
+                                "unit": {
+                                    "type": "string",
+                                    "enum": ["celsius", "fahrenheit"],
+                                },
+                            },
+                            "required": ["location"],
+                        },
+                        "cache_control": {"type": "ephemeral"},
+                    },
+                }
+            ],
+            extra_headers={
+                "anthropic-version": "2023-06-01",
+                "anthropic-beta": "prompt-caching-2024-07-31",
+            },
+        )
+
+        # Print what was called on the mock
+        print("call args=", mock_post.call_args)
+
+        expected_url = "https://api.anthropic.com/v1/messages"
+        expected_headers = {
+            "accept": "application/json",
+            "content-type": "application/json",
+            "anthropic-version": "2023-06-01",
+            "anthropic-beta": "prompt-caching-2024-07-31",
+            "x-api-key": "mock_api_key",
+        }
+
+        expected_json = {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "What's the weather like in Boston today?",
+                        }
+                    ],
+                }
+            ],
+            "tools": [
+                {
+                    "name": "get_current_weather",
+                    "description": "Get the current weather in a given location",
+                    "cache_control": {"type": "ephemeral"},
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "type": "string",
+                                "description": "The city and state, e.g. San Francisco, CA",
+                            },
+                            "unit": {
+                                "type": "string",
+                                "enum": ["celsius", "fahrenheit"],
+                            },
+                        },
+                        "required": ["location"],
+                    },
+                }
+            ],
+            "max_tokens": 4096,
+            "model": "claude-3-5-sonnet-20240620",
+        }
+
+        mock_post.assert_called_once_with(
+            expected_url, json=expected_json, headers=expected_headers, timeout=600.0
+        )
+
+
+@pytest.mark.asyncio()
+async def test_anthropic_api_prompt_caching_basic():
+    litellm.set_verbose = True
+    response = await litellm.acompletion(
+        model="anthropic/claude-3-5-sonnet-20240620",
+        messages=[
+            # System Message
+            {
+                "role": "system",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Here is the full text of a complex legal agreement"
+                        * 400,
+                        "cache_control": {"type": "ephemeral"},
+                    }
+                ],
+            },
+            # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "What are the key terms and conditions in this agreement?",
+                        "cache_control": {"type": "ephemeral"},
+                    }
+                ],
+            },
+            {
+                "role": "assistant",
+                "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
+            },
+            # The final turn is marked with cache-control, for continuing in followups.
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "What are the key terms and conditions in this agreement?",
+                        "cache_control": {"type": "ephemeral"},
+                    }
+                ],
+            },
+        ],
+        temperature=0.2,
+        max_tokens=10,
+        extra_headers={
+            "anthropic-version": "2023-06-01",
+            "anthropic-beta": "prompt-caching-2024-07-31",
+        },
+    )
+
+    print("response=", response)
+
+    assert "cache_read_input_tokens" in response.usage
+    assert "cache_creation_input_tokens" in response.usage
+
+    # Assert either a cache entry was created or cache was read - changes depending on the anthropic api ttl
+    assert (response.usage.cache_read_input_tokens > 0) or (
+        response.usage.cache_creation_input_tokens > 0
+    )
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 7f73d62945..b945d3d1e2 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -3449,185 +3449,6 @@ def response_format_tests(response: litellm.ModelResponse):
     assert isinstance(response.usage.total_tokens, int)  # type: ignore
 
 
-@pytest.mark.asyncio()
-async def test_anthropic_api_prompt_caching_basic():
-    litellm.set_verbose = True
-    response = await litellm.acompletion(
-        model="anthropic/claude-3-5-sonnet-20240620",
-        messages=[
-            # System Message
-            {
-                "role": "system",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": "Here is the full text of a complex legal agreement"
-                        * 400,
-                        "cache_control": {"type": "ephemeral"},
-                    }
-                ],
-            },
-            # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": "What are the key terms and conditions in this agreement?",
-                        "cache_control": {"type": "ephemeral"},
-                    }
-                ],
-            },
-            {
-                "role": "assistant",
-                "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
-            },
-            # The final turn is marked with cache-control, for continuing in followups.
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": "What are the key terms and conditions in this agreement?",
-                        "cache_control": {"type": "ephemeral"},
-                    }
-                ],
-            },
-        ],
-        temperature=0.2,
-        max_tokens=10,
-        extra_headers={
-            "anthropic-version": "2023-06-01",
-            "anthropic-beta": "prompt-caching-2024-07-31",
-        },
-    )
-
-    print("response=", response)
-
-    assert "cache_read_input_tokens" in response.usage
-    assert "cache_creation_input_tokens" in response.usage
-
-    # Assert either a cache entry was created or cache was read - changes depending on the anthropic api ttl
-    assert (response.usage.cache_read_input_tokens > 0) or (
-        response.usage.cache_creation_input_tokens > 0
-    )
-
-
-@pytest.mark.asyncio
-async def test_litellm_acompletion_httpx_call():
-    # Arrange: Set up the MagicMock for the httpx.AsyncClient
-    mock_response = AsyncMock()
-
-    def return_val():
-        return {
-            "id": "msg_01XFDUDYJgAACzvnptvVoYEL",
-            "type": "message",
-            "role": "assistant",
-            "content": [{"type": "text", "text": "Hello!"}],
-            "model": "claude-3-5-sonnet-20240620",
-            "stop_reason": "end_turn",
-            "stop_sequence": None,
-            "usage": {"input_tokens": 12, "output_tokens": 6},
-        }
-
-    mock_response.json = return_val
-
-    litellm.set_verbose = True
-    with patch(
-        "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
-        return_value=mock_response,
-    ) as mock_post:
-        # Act: Call the litellm.acompletion function
-        response = await litellm.acompletion(
-            api_key="mock_api_key",
-            model="anthropic/claude-3-5-sonnet-20240620",
-            messages=[
-                {"role": "user", "content": "What's the weather like in Boston today?"}
-            ],
-            tools=[
-                {
-                    "type": "function",
-                    "function": {
-                        "name": "get_current_weather",
-                        "description": "Get the current weather in a given location",
-                        "parameters": {
-                            "type": "object",
-                            "properties": {
-                                "location": {
-                                    "type": "string",
-                                    "description": "The city and state, e.g. San Francisco, CA",
-                                },
-                                "unit": {
-                                    "type": "string",
-                                    "enum": ["celsius", "fahrenheit"],
-                                },
-                            },
-                            "required": ["location"],
-                        },
-                        "cache_control": {"type": "ephemeral"},
-                    },
-                }
-            ],
-            extra_headers={
-                "anthropic-version": "2023-06-01",
-                "anthropic-beta": "prompt-caching-2024-07-31",
-            },
-        )
-
-        # Print what was called on the mock
-        print("call args=", mock_post.call_args)
-
-        expected_url = "https://api.anthropic.com/v1/messages"
-        expected_headers = {
-            "accept": "application/json",
-            "content-type": "application/json",
-            "anthropic-version": "2023-06-01",
-            "anthropic-beta": "prompt-caching-2024-07-31",
-            "x-api-key": "mock_api_key",
-        }
-
-        expected_json = {
-            "messages": [
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": "What's the weather like in Boston today?",
-                        }
-                    ],
-                }
-            ],
-            "tools": [
-                {
-                    "name": "get_current_weather",
-                    "description": "Get the current weather in a given location",
-                    "cache_control": {"type": "ephemeral"},
-                    "input_schema": {
-                        "type": "object",
-                        "properties": {
-                            "location": {
-                                "type": "string",
-                                "description": "The city and state, e.g. San Francisco, CA",
-                            },
-                            "unit": {
-                                "type": "string",
-                                "enum": ["celsius", "fahrenheit"],
-                            },
-                        },
-                        "required": ["location"],
-                    },
-                }
-            ],
-            "max_tokens": 4096,
-            "model": "claude-3-5-sonnet-20240620",
-        }
-
-        mock_post.assert_called_once_with(
-            expected_url, json=expected_json, headers=expected_headers, timeout=600.0
-        )
-
-
 @pytest.mark.parametrize(
     "model",
     [