From 22b51c5af48f9a090cb4a215f912d449f3638c5e Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Fri, 7 Jun 2024 22:07:55 -0700
Subject: [PATCH] fix(litellm_pre_call_utils.py): add support for key level
 caching params

---
 docs/my-website/docs/proxy/caching.md      | 29 +++++++++++++++++++++-
 litellm/proxy/hooks/cache_control_check.py |  2 +-
 litellm/proxy/litellm_pre_call_utils.py    | 10 ++++++++
 litellm/types/utils.py                     |  3 +++
 4 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/docs/my-website/docs/proxy/caching.md b/docs/my-website/docs/proxy/caching.md
index 15b1921b0e..25fb4ce34a 100644
--- a/docs/my-website/docs/proxy/caching.md
+++ b/docs/my-website/docs/proxy/caching.md
@@ -283,7 +283,7 @@ litellm_settings:
 
 ### Turn on / off caching per request.  
 
-The proxy support 3 cache-controls:
+The proxy support 4 cache-controls:
 
 - `ttl`: *Optional(int)* - Will cache the response for the user-defined amount of time (in seconds).
 - `s-maxage`: *Optional(int)* Will only accept cached responses that are within user-defined range (in seconds).
@@ -374,6 +374,33 @@ chat_completion = client.chat.completions.create(
 )
 ```
 
+### Turn on / off caching per Key.
+
+1. Add cache params when creating a key [full list](#turn-on--off-caching-per-key)
+
+```bash 
+curl -X POST 'http://0.0.0.0:4000/key/generate' \
+-H 'Authorization: Bearer sk-1234' \
+-H 'Content-Type: application/json' \
+-D '{
+    "user_id": "222",
+    "metadata": {
+        "cache": {
+            "no-cache": true
+        }
+    }
+}'
+```
+
+2. Test it! 
+
+```bash 
+curl -X POST 'http://localhost:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer <YOUR_NEW_KEY>' \
+-D '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "bom dia"}]}'
+```
+
 ### Deleting Cache Keys - `/cache/delete` 
 In order to delete a cache key, send a request to `/cache/delete` with the `keys` you want to delete
 
diff --git a/litellm/proxy/hooks/cache_control_check.py b/litellm/proxy/hooks/cache_control_check.py
index 89971a0bf7..238e2e6ab7 100644
--- a/litellm/proxy/hooks/cache_control_check.py
+++ b/litellm/proxy/hooks/cache_control_check.py
@@ -27,7 +27,7 @@ class _PROXY_CacheControlCheck(CustomLogger):
         call_type: str,
     ):
         try:
-            self.print_verbose(f"Inside Cache Control Check Pre-Call Hook")
+            self.print_verbose("Inside Cache Control Check Pre-Call Hook")
             allowed_cache_controls = user_api_key_dict.allowed_cache_controls
 
             if data.get("cache", None) is None:
diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index 945799b4cf..089d469af9 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -3,6 +3,7 @@ from fastapi import Request
 from typing import Any, Dict, Optional, TYPE_CHECKING
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm._logging import verbose_proxy_logger, verbose_logger
+from litellm.types.utils import SupportedCacheControls
 
 if TYPE_CHECKING:
     from litellm.proxy.proxy_server import ProxyConfig as _ProxyConfig
@@ -68,6 +69,15 @@ async def add_litellm_data_to_request(
         cache_dict = parse_cache_control(cache_control_header)
         data["ttl"] = cache_dict.get("s-maxage")
 
+    ### KEY-LEVEL CACHNG
+    key_metadata = user_api_key_dict.metadata
+    if "cache" in key_metadata:
+        data["cache"] = {}
+        if isinstance(key_metadata["cache"], dict):
+            for k, v in key_metadata["cache"].items():
+                if k in SupportedCacheControls:
+                    data["cache"][k] = v
+
     verbose_proxy_logger.debug("receiving data: %s", data)
     # users can pass in 'user' param to /chat/completions. Don't override it
     if data.get("user", None) is None and user_api_key_dict.user_id is not None:
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index f16d9567fa..2b6aefcf59 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -7,6 +7,9 @@ class LiteLLMCommonStrings(Enum):
     redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'"
 
 
+SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"]
+
+
 class CostPerToken(TypedDict):
     input_cost_per_token: float
     output_cost_per_token: float