diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index a69f28d10..e0668f466 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3432,10 +3432,10 @@ async def chat_completion( # Post Call Processing data["litellm_status"] = "success" # used for alerting - if hasattr(response, "_hidden_params"): - model_id = response._hidden_params.get("model_id", None) or "" - else: - model_id = "" + + hidden_params = getattr(response, "_hidden_params", {}) or {} + model_id = hidden_params.get("model_id", None) or "" + cache_key = hidden_params.get("cache_key", None) or "" if ( "stream" in data and data["stream"] == True @@ -3451,6 +3451,7 @@ async def chat_completion( ) fastapi_response.headers["x-litellm-model-id"] = model_id + fastapi_response.headers["x-litellm-cache-key"] = cache_key ### CALL HOOKS ### - modify outgoing data response = await proxy_logging_obj.post_call_success_hook( diff --git a/litellm/utils.py b/litellm/utils.py index e92f2068a..17a31751d 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -3132,6 +3132,8 @@ def client(original_function): target=logging_obj.success_handler, args=(cached_result, start_time, end_time, cache_hit), ).start() + cache_key = kwargs.get("preset_cache_key", None) + cached_result._hidden_params["cache_key"] = cache_key return cached_result elif ( call_type == CallTypes.aembedding.value