diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 75e9a4525a..25c6cd7361 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3440,7 +3440,10 @@ async def chat_completion( if ( "stream" in data and data["stream"] == True ): # use generate_responses to stream responses - custom_headers = {"x-litellm-model-id": model_id} + custom_headers = { + "x-litellm-model-id": model_id, + "x-litellm-cache-key": cache_key, + } selected_data_generator = select_data_generator( response=response, user_api_key_dict=user_api_key_dict )