mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
fix using prompt caching on proxy
This commit is contained in:
parent
42c2290a77
commit
6cb3675a06
2 changed files with 68 additions and 1 deletions
|
@ -126,14 +126,19 @@ async def add_litellm_data_to_request(
|
||||||
|
|
||||||
safe_add_api_version_from_query_params(data, request)
|
safe_add_api_version_from_query_params(data, request)
|
||||||
|
|
||||||
|
_headers = dict(request.headers)
|
||||||
|
|
||||||
# Include original request and headers in the data
|
# Include original request and headers in the data
|
||||||
data["proxy_server_request"] = {
|
data["proxy_server_request"] = {
|
||||||
"url": str(request.url),
|
"url": str(request.url),
|
||||||
"method": request.method,
|
"method": request.method,
|
||||||
"headers": dict(request.headers),
|
"headers": _headers,
|
||||||
"body": copy.copy(data), # use copy instead of deepcopy
|
"body": copy.copy(data), # use copy instead of deepcopy
|
||||||
}
|
}
|
||||||
|
|
||||||
|
## Forward any LLM API Provider specific headers in extra_headers
|
||||||
|
add_provider_specific_headers_to_request(data=data, headers=_headers)
|
||||||
|
|
||||||
## Cache Controls
|
## Cache Controls
|
||||||
headers = request.headers
|
headers = request.headers
|
||||||
verbose_proxy_logger.debug("Request Headers: %s", headers)
|
verbose_proxy_logger.debug("Request Headers: %s", headers)
|
||||||
|
@ -306,6 +311,31 @@ async def add_litellm_data_to_request(
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def add_provider_specific_headers_to_request(
|
||||||
|
data: dict,
|
||||||
|
headers: dict,
|
||||||
|
):
|
||||||
|
ANTHROPIC_API_HEADERS = [
|
||||||
|
"anthropic-version",
|
||||||
|
"anthropic-beta",
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_headers = data.get("extra_headers", {}) or {}
|
||||||
|
|
||||||
|
# boolean to indicate if a header was added
|
||||||
|
added_header = False
|
||||||
|
for header in ANTHROPIC_API_HEADERS:
|
||||||
|
if header in headers:
|
||||||
|
header_value = headers[header]
|
||||||
|
extra_headers.update({header: header_value})
|
||||||
|
added_header = True
|
||||||
|
|
||||||
|
if added_header is True:
|
||||||
|
data["extra_headers"] = extra_headers
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
def _add_otel_traceparent_to_data(data: dict, request: Request):
|
def _add_otel_traceparent_to_data(data: dict, request: Request):
|
||||||
from litellm.proxy.proxy_server import open_telemetry_logger
|
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||||
|
|
||||||
|
|
37
litellm/proxy/tests/test_anthropic_context_caching.py
Normal file
37
litellm/proxy/tests/test_anthropic_context_caching.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
import openai
|
||||||
|
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="sk-1234", # litellm proxy api key
|
||||||
|
base_url="http://0.0.0.0:4000", # litellm proxy base url
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="anthropic/claude-3-5-sonnet-20240620",
|
||||||
|
messages=[
|
||||||
|
{ # type: ignore
|
||||||
|
"role": "system",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "You are an AI assistant tasked with analyzing legal documents.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Here is the full text of a complex legal agreement" * 100,
|
||||||
|
"cache_control": {"type": "ephemeral"},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what are the key terms and conditions in this agreement?",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
extra_headers={
|
||||||
|
"anthropic-version": "2023-06-01",
|
||||||
|
"anthropic-beta": "prompt-caching-2024-07-31",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
Loading…
Add table
Add a link
Reference in a new issue