From 7e1d5c81b4633c13e60e1439f50b85320f561f8b Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 4 Apr 2024 11:00:00 -0700 Subject: [PATCH] return cache key in streming responses --- litellm/proxy/proxy_server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 75e9a4525a..25c6cd7361 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3440,7 +3440,10 @@ async def chat_completion( if ( "stream" in data and data["stream"] == True ): # use generate_responses to stream responses - custom_headers = {"x-litellm-model-id": model_id} + custom_headers = { + "x-litellm-model-id": model_id, + "x-litellm-cache-key": cache_key, + } selected_data_generator = select_data_generator( response=response, user_api_key_dict=user_api_key_dict )