fix using prompt caching on proxy

2025-04-25 18:54:30 +00:00 · 2024-08-15 20:12:11 -07:00 · 2024-08-15 20:12:11 -07:00 · 6cb3675a06
commit 6cb3675a06
parent 42c2290a77
2 changed files with 68 additions and 1 deletions
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@ -126,14 +126,19 @@ async def add_litellm_data_to_request(
    safe_add_api_version_from_query_params(data, request)
    _headers = dict(request.headers)
    # Include original request and headers in the data
    data["proxy_server_request"] = {
        "url": str(request.url),
        "method": request.method,
-        "headers": dict(request.headers),
+        "headers": _headers,
        "body": copy.copy(data),  # use copy instead of deepcopy
    }
    ## Forward any LLM API Provider specific headers in extra_headers
    add_provider_specific_headers_to_request(data=data, headers=_headers)
    ## Cache Controls
    headers = request.headers
    verbose_proxy_logger.debug("Request Headers: %s", headers)
@ -306,6 +311,31 @@ async def add_litellm_data_to_request(
    return data
 def add_provider_specific_headers_to_request(
    data: dict,
    headers: dict,
 ):
    ANTHROPIC_API_HEADERS = [
        "anthropic-version",
        "anthropic-beta",
    ]
    extra_headers = data.get("extra_headers", {}) or {}
    # boolean to indicate if a header was added
    added_header = False
    for header in ANTHROPIC_API_HEADERS:
        if header in headers:
            header_value = headers[header]
            extra_headers.update({header: header_value})
            added_header = True
    if added_header is True:
        data["extra_headers"] = extra_headers
    return
 def _add_otel_traceparent_to_data(data: dict, request: Request):
    from litellm.proxy.proxy_server import open_telemetry_logger
--- a/litellm/proxy/tests/test_anthropic_context_caching.py
+++ b/litellm/proxy/tests/test_anthropic_context_caching.py
@ -0,0 +1,37 @@
 import openai
 client = openai.OpenAI(
    api_key="sk-1234",  # litellm proxy api key
    base_url="http://0.0.0.0:4000",  # litellm proxy base url
 )
 response = client.chat.completions.create(
    model="anthropic/claude-3-5-sonnet-20240620",
    messages=[
        {  # type: ignore
            "role": "system",
            "content": [
                {
                    "type": "text",
                    "text": "You are an AI assistant tasked with analyzing legal documents.",
                },
                {
                    "type": "text",
                    "text": "Here is the full text of a complex legal agreement" * 100,
                    "cache_control": {"type": "ephemeral"},
                },
            ],
        },
        {
            "role": "user",
            "content": "what are the key terms and conditions in this agreement?",
        },
    ],
    extra_headers={
        "anthropic-version": "2023-06-01",
        "anthropic-beta": "prompt-caching-2024-07-31",
    },
 )
 print(response)