fix using prompt caching on proxy

This commit is contained in:
Ishaan Jaff 2024-08-15 20:12:11 -07:00
parent 42c2290a77
commit 6cb3675a06
2 changed files with 68 additions and 1 deletions

View file

@ -126,14 +126,19 @@ async def add_litellm_data_to_request(
safe_add_api_version_from_query_params(data, request)
_headers = dict(request.headers)
# Include original request and headers in the data
data["proxy_server_request"] = {
"url": str(request.url),
"method": request.method,
"headers": dict(request.headers),
"headers": _headers,
"body": copy.copy(data), # use copy instead of deepcopy
}
## Forward any LLM API Provider specific headers in extra_headers
add_provider_specific_headers_to_request(data=data, headers=_headers)
## Cache Controls
headers = request.headers
verbose_proxy_logger.debug("Request Headers: %s", headers)
@ -306,6 +311,31 @@ async def add_litellm_data_to_request(
return data
def add_provider_specific_headers_to_request(
data: dict,
headers: dict,
):
ANTHROPIC_API_HEADERS = [
"anthropic-version",
"anthropic-beta",
]
extra_headers = data.get("extra_headers", {}) or {}
# boolean to indicate if a header was added
added_header = False
for header in ANTHROPIC_API_HEADERS:
if header in headers:
header_value = headers[header]
extra_headers.update({header: header_value})
added_header = True
if added_header is True:
data["extra_headers"] = extra_headers
return
def _add_otel_traceparent_to_data(data: dict, request: Request):
from litellm.proxy.proxy_server import open_telemetry_logger