fix using prompt caching on proxy

This commit is contained in:
Ishaan Jaff 2024-08-15 20:12:11 -07:00
parent 42c2290a77
commit 6cb3675a06
2 changed files with 68 additions and 1 deletions

View file

@ -126,14 +126,19 @@ async def add_litellm_data_to_request(
safe_add_api_version_from_query_params(data, request) safe_add_api_version_from_query_params(data, request)
_headers = dict(request.headers)
# Include original request and headers in the data # Include original request and headers in the data
data["proxy_server_request"] = { data["proxy_server_request"] = {
"url": str(request.url), "url": str(request.url),
"method": request.method, "method": request.method,
"headers": dict(request.headers), "headers": _headers,
"body": copy.copy(data), # use copy instead of deepcopy "body": copy.copy(data), # use copy instead of deepcopy
} }
## Forward any LLM API Provider specific headers in extra_headers
add_provider_specific_headers_to_request(data=data, headers=_headers)
## Cache Controls ## Cache Controls
headers = request.headers headers = request.headers
verbose_proxy_logger.debug("Request Headers: %s", headers) verbose_proxy_logger.debug("Request Headers: %s", headers)
@ -306,6 +311,31 @@ async def add_litellm_data_to_request(
return data return data
def add_provider_specific_headers_to_request(
data: dict,
headers: dict,
):
ANTHROPIC_API_HEADERS = [
"anthropic-version",
"anthropic-beta",
]
extra_headers = data.get("extra_headers", {}) or {}
# boolean to indicate if a header was added
added_header = False
for header in ANTHROPIC_API_HEADERS:
if header in headers:
header_value = headers[header]
extra_headers.update({header: header_value})
added_header = True
if added_header is True:
data["extra_headers"] = extra_headers
return
def _add_otel_traceparent_to_data(data: dict, request: Request): def _add_otel_traceparent_to_data(data: dict, request: Request):
from litellm.proxy.proxy_server import open_telemetry_logger from litellm.proxy.proxy_server import open_telemetry_logger

View file

@ -0,0 +1,37 @@
import openai
client = openai.OpenAI(
api_key="sk-1234", # litellm proxy api key
base_url="http://0.0.0.0:4000", # litellm proxy base url
)
response = client.chat.completions.create(
model="anthropic/claude-3-5-sonnet-20240620",
messages=[
{ # type: ignore
"role": "system",
"content": [
{
"type": "text",
"text": "You are an AI assistant tasked with analyzing legal documents.",
},
{
"type": "text",
"text": "Here is the full text of a complex legal agreement" * 100,
"cache_control": {"type": "ephemeral"},
},
],
},
{
"role": "user",
"content": "what are the key terms and conditions in this agreement?",
},
],
extra_headers={
"anthropic-version": "2023-06-01",
"anthropic-beta": "prompt-caching-2024-07-31",
},
)
print(response)