From 6cb3675a0632033ae8f7a26fb0328c77a74cac77 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 15 Aug 2024 20:12:11 -0700
Subject: [PATCH] fix using prompt caching on proxy

---
 litellm/proxy/litellm_pre_call_utils.py       | 32 +++++++++++++++-
 .../tests/test_anthropic_context_caching.py   | 37 +++++++++++++++++++
 2 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 litellm/proxy/tests/test_anthropic_context_caching.py

diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index 9b896f66c2..dd39efd6b7 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -126,14 +126,19 @@ async def add_litellm_data_to_request(
 
     safe_add_api_version_from_query_params(data, request)
 
+    _headers = dict(request.headers)
+
     # Include original request and headers in the data
     data["proxy_server_request"] = {
         "url": str(request.url),
         "method": request.method,
-        "headers": dict(request.headers),
+        "headers": _headers,
         "body": copy.copy(data),  # use copy instead of deepcopy
     }
 
+    ## Forward any LLM API Provider specific headers in extra_headers
+    add_provider_specific_headers_to_request(data=data, headers=_headers)
+
     ## Cache Controls
     headers = request.headers
     verbose_proxy_logger.debug("Request Headers: %s", headers)
@@ -306,6 +311,31 @@ async def add_litellm_data_to_request(
     return data
 
 
+def add_provider_specific_headers_to_request(
+    data: dict,
+    headers: dict,
+):
+    ANTHROPIC_API_HEADERS = [
+        "anthropic-version",
+        "anthropic-beta",
+    ]
+
+    extra_headers = data.get("extra_headers", {}) or {}
+
+    # boolean to indicate if a header was added
+    added_header = False
+    for header in ANTHROPIC_API_HEADERS:
+        if header in headers:
+            header_value = headers[header]
+            extra_headers.update({header: header_value})
+            added_header = True
+
+    if added_header is True:
+        data["extra_headers"] = extra_headers
+
+    return
+
+
 def _add_otel_traceparent_to_data(data: dict, request: Request):
     from litellm.proxy.proxy_server import open_telemetry_logger
 
diff --git a/litellm/proxy/tests/test_anthropic_context_caching.py b/litellm/proxy/tests/test_anthropic_context_caching.py
new file mode 100644
index 0000000000..6156e4a048
--- /dev/null
+++ b/litellm/proxy/tests/test_anthropic_context_caching.py
@@ -0,0 +1,37 @@
+import openai
+
+client = openai.OpenAI(
+    api_key="sk-1234",  # litellm proxy api key
+    base_url="http://0.0.0.0:4000",  # litellm proxy base url
+)
+
+
+response = client.chat.completions.create(
+    model="anthropic/claude-3-5-sonnet-20240620",
+    messages=[
+        {  # type: ignore
+            "role": "system",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "You are an AI assistant tasked with analyzing legal documents.",
+                },
+                {
+                    "type": "text",
+                    "text": "Here is the full text of a complex legal agreement" * 100,
+                    "cache_control": {"type": "ephemeral"},
+                },
+            ],
+        },
+        {
+            "role": "user",
+            "content": "what are the key terms and conditions in this agreement?",
+        },
+    ],
+    extra_headers={
+        "anthropic-version": "2023-06-01",
+        "anthropic-beta": "prompt-caching-2024-07-31",
+    },
+)
+
+print(response)