fix(litellm_pre_call_utils.py): add support for key level caching params

2024-06-07 22:07:55 -07:00 · 2024-06-07 22:07:55 -07:00 · af1ae80277
commit af1ae80277
parent c5a611ca91
4 changed files with 42 additions and 2 deletions
--- a/docs/my-website/docs/proxy/caching.md
+++ b/docs/my-website/docs/proxy/caching.md
@ -283,7 +283,7 @@ litellm_settings:

 ### Turn on / off caching per request.  

-The proxy support 3 cache-controls:
+The proxy support 4 cache-controls:

 - `ttl`: *Optional(int)* - Will cache the response for the user-defined amount of time (in seconds).
 - `s-maxage`: *Optional(int)* Will only accept cached responses that are within user-defined range (in seconds).
@ -374,6 +374,33 @@ chat_completion = client.chat.completions.create(
 )
 ```

+### Turn on / off caching per Key.
+
+1. Add cache params when creating a key [full list](#turn-on--off-caching-per-key)
+
+```bash 
+curl -X POST 'http://0.0.0.0:4000/key/generate' \
+-H 'Authorization: Bearer sk-1234' \
+-H 'Content-Type: application/json' \
+-D '{
+    "user_id": "222",
+    "metadata": {
+        "cache": {
+            "no-cache": true
+        }
+    }
+}'
+```
+
+2. Test it! 
+
+```bash 
+curl -X POST 'http://localhost:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer <YOUR_NEW_KEY>' \
+-D '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "bom dia"}]}'
+```
+
 ### Deleting Cache Keys - `/cache/delete` 
 In order to delete a cache key, send a request to `/cache/delete` with the `keys` you want to delete

--- a/litellm/proxy/hooks/cache_control_check.py
+++ b/litellm/proxy/hooks/cache_control_check.py
@ -27,7 +27,7 @@ class _PROXY_CacheControlCheck(CustomLogger):
        call_type: str,
    ):
        try:
-            self.print_verbose(f"Inside Cache Control Check Pre-Call Hook")
+            self.print_verbose("Inside Cache Control Check Pre-Call Hook")
            allowed_cache_controls = user_api_key_dict.allowed_cache_controls

            if data.get("cache", None) is None:
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@ -3,6 +3,7 @@ from fastapi import Request
 from typing import Any, Dict, Optional, TYPE_CHECKING
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm._logging import verbose_proxy_logger, verbose_logger
+from litellm.types.utils import SupportedCacheControls

 if TYPE_CHECKING:
    from litellm.proxy.proxy_server import ProxyConfig as _ProxyConfig
@ -68,6 +69,15 @@ async def add_litellm_data_to_request(
        cache_dict = parse_cache_control(cache_control_header)
        data["ttl"] = cache_dict.get("s-maxage")

+    ### KEY-LEVEL CACHNG
+    key_metadata = user_api_key_dict.metadata
+    if "cache" in key_metadata:
+        data["cache"] = {}
+        if isinstance(key_metadata["cache"], dict):
+            for k, v in key_metadata["cache"].items():
+                if k in SupportedCacheControls:
+                    data["cache"][k] = v
+
    verbose_proxy_logger.debug("receiving data: %s", data)
    # users can pass in 'user' param to /chat/completions. Don't override it
    if data.get("user", None) is None and user_api_key_dict.user_id is not None:
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -7,6 +7,9 @@ class LiteLLMCommonStrings(Enum):
    redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'"


+SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"]
+
+
 class CostPerToken(TypedDict):
    input_cost_per_token: float
    output_cost_per_token: float