fix(litellm_pre_call_utils.py): add support for key level caching params

2024-06-07 22:07:55 -07:00 · 2024-06-07 22:07:55 -07:00 · af1ae80277
commit af1ae80277
parent c5a611ca91
4 changed files with 42 additions and 2 deletions
--- a/docs/my-website/docs/proxy/caching.md
+++ b/docs/my-website/docs/proxy/caching.md
@ -283,7 +283,7 @@ litellm_settings:
 ### Turn on / off caching per request.  
-The proxy support 3 cache-controls:
+The proxy support 4 cache-controls:
 - `ttl`: *Optional(int)* - Will cache the response for the user-defined amount of time (in seconds).
 - `s-maxage`: *Optional(int)* Will only accept cached responses that are within user-defined range (in seconds).
@ -374,6 +374,33 @@ chat_completion = client.chat.completions.create(
 )
 ```
 ### Turn on / off caching per Key.
 1. Add cache params when creating a key [full list](#turn-on--off-caching-per-key)
 ```bash 
 curl -X POST 'http://0.0.0.0:4000/key/generate' \
 -H 'Authorization: Bearer sk-1234' \
 -H 'Content-Type: application/json' \
 -D '{
    "user_id": "222",
    "metadata": {
        "cache": {
            "no-cache": true
        }
    }
 }'
 ```
 2. Test it! 
 ```bash 
 curl -X POST 'http://localhost:4000/chat/completions' \
 -H 'Content-Type: application/json' \
 -H 'Authorization: Bearer <YOUR_NEW_KEY>' \
 -D '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "bom dia"}]}'
 ```
 ### Deleting Cache Keys - `/cache/delete` 
 In order to delete a cache key, send a request to `/cache/delete` with the `keys` you want to delete
--- a/litellm/proxy/hooks/cache_control_check.py
+++ b/litellm/proxy/hooks/cache_control_check.py
@ -27,7 +27,7 @@ class _PROXY_CacheControlCheck(CustomLogger):
        call_type: str,
    ):
        try:
-            self.print_verbose(f"Inside Cache Control Check Pre-Call Hook")
+            self.print_verbose("Inside Cache Control Check Pre-Call Hook")
            allowed_cache_controls = user_api_key_dict.allowed_cache_controls
            if data.get("cache", None) is None:
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@ -3,6 +3,7 @@ from fastapi import Request
 from typing import Any, Dict, Optional, TYPE_CHECKING
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm._logging import verbose_proxy_logger, verbose_logger
 from litellm.types.utils import SupportedCacheControls
 if TYPE_CHECKING:
    from litellm.proxy.proxy_server import ProxyConfig as _ProxyConfig
@ -68,6 +69,15 @@ async def add_litellm_data_to_request(
        cache_dict = parse_cache_control(cache_control_header)
        data["ttl"] = cache_dict.get("s-maxage")
    ### KEY-LEVEL CACHNG
    key_metadata = user_api_key_dict.metadata
    if "cache" in key_metadata:
        data["cache"] = {}
        if isinstance(key_metadata["cache"], dict):
            for k, v in key_metadata["cache"].items():
                if k in SupportedCacheControls:
                    data["cache"][k] = v
    verbose_proxy_logger.debug("receiving data: %s", data)
    # users can pass in 'user' param to /chat/completions. Don't override it
    if data.get("user", None) is None and user_api_key_dict.user_id is not None:
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -7,6 +7,9 @@ class LiteLLMCommonStrings(Enum):
    redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'"
 SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"]
 class CostPerToken(TypedDict):
    input_cost_per_token: float
    output_cost_per_token: float