From 6cb3675a0632033ae8f7a26fb0328c77a74cac77 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 15 Aug 2024 20:12:11 -0700 Subject: [PATCH] fix using prompt caching on proxy --- litellm/proxy/litellm_pre_call_utils.py | 32 +++++++++++++++- .../tests/test_anthropic_context_caching.py | 37 +++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 litellm/proxy/tests/test_anthropic_context_caching.py diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 9b896f66c2..dd39efd6b7 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -126,14 +126,19 @@ async def add_litellm_data_to_request( safe_add_api_version_from_query_params(data, request) + _headers = dict(request.headers) + # Include original request and headers in the data data["proxy_server_request"] = { "url": str(request.url), "method": request.method, - "headers": dict(request.headers), + "headers": _headers, "body": copy.copy(data), # use copy instead of deepcopy } + ## Forward any LLM API Provider specific headers in extra_headers + add_provider_specific_headers_to_request(data=data, headers=_headers) + ## Cache Controls headers = request.headers verbose_proxy_logger.debug("Request Headers: %s", headers) @@ -306,6 +311,31 @@ async def add_litellm_data_to_request( return data +def add_provider_specific_headers_to_request( + data: dict, + headers: dict, +): + ANTHROPIC_API_HEADERS = [ + "anthropic-version", + "anthropic-beta", + ] + + extra_headers = data.get("extra_headers", {}) or {} + + # boolean to indicate if a header was added + added_header = False + for header in ANTHROPIC_API_HEADERS: + if header in headers: + header_value = headers[header] + extra_headers.update({header: header_value}) + added_header = True + + if added_header is True: + data["extra_headers"] = extra_headers + + return + + def _add_otel_traceparent_to_data(data: dict, request: Request): from litellm.proxy.proxy_server import open_telemetry_logger diff --git a/litellm/proxy/tests/test_anthropic_context_caching.py b/litellm/proxy/tests/test_anthropic_context_caching.py new file mode 100644 index 0000000000..6156e4a048 --- /dev/null +++ b/litellm/proxy/tests/test_anthropic_context_caching.py @@ -0,0 +1,37 @@ +import openai + +client = openai.OpenAI( + api_key="sk-1234", # litellm proxy api key + base_url="http://0.0.0.0:4000", # litellm proxy base url +) + + +response = client.chat.completions.create( + model="anthropic/claude-3-5-sonnet-20240620", + messages=[ + { # type: ignore + "role": "system", + "content": [ + { + "type": "text", + "text": "You are an AI assistant tasked with analyzing legal documents.", + }, + { + "type": "text", + "text": "Here is the full text of a complex legal agreement" * 100, + "cache_control": {"type": "ephemeral"}, + }, + ], + }, + { + "role": "user", + "content": "what are the key terms and conditions in this agreement?", + }, + ], + extra_headers={ + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + }, +) + +print(response)