From 22b51c5af48f9a090cb4a215f912d449f3638c5e Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 7 Jun 2024 22:07:55 -0700 Subject: [PATCH] fix(litellm_pre_call_utils.py): add support for key level caching params --- docs/my-website/docs/proxy/caching.md | 29 +++++++++++++++++++++- litellm/proxy/hooks/cache_control_check.py | 2 +- litellm/proxy/litellm_pre_call_utils.py | 10 ++++++++ litellm/types/utils.py | 3 +++ 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/docs/my-website/docs/proxy/caching.md b/docs/my-website/docs/proxy/caching.md index 15b1921b0e..25fb4ce34a 100644 --- a/docs/my-website/docs/proxy/caching.md +++ b/docs/my-website/docs/proxy/caching.md @@ -283,7 +283,7 @@ litellm_settings: ### Turn on / off caching per request. -The proxy support 3 cache-controls: +The proxy support 4 cache-controls: - `ttl`: *Optional(int)* - Will cache the response for the user-defined amount of time (in seconds). - `s-maxage`: *Optional(int)* Will only accept cached responses that are within user-defined range (in seconds). @@ -374,6 +374,33 @@ chat_completion = client.chat.completions.create( ) ``` +### Turn on / off caching per Key. + +1. Add cache params when creating a key [full list](#turn-on--off-caching-per-key) + +```bash +curl -X POST 'http://0.0.0.0:4000/key/generate' \ +-H 'Authorization: Bearer sk-1234' \ +-H 'Content-Type: application/json' \ +-D '{ + "user_id": "222", + "metadata": { + "cache": { + "no-cache": true + } + } +}' +``` + +2. Test it! + +```bash +curl -X POST 'http://localhost:4000/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer ' \ +-D '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "bom dia"}]}' +``` + ### Deleting Cache Keys - `/cache/delete` In order to delete a cache key, send a request to `/cache/delete` with the `keys` you want to delete diff --git a/litellm/proxy/hooks/cache_control_check.py b/litellm/proxy/hooks/cache_control_check.py index 89971a0bf7..238e2e6ab7 100644 --- a/litellm/proxy/hooks/cache_control_check.py +++ b/litellm/proxy/hooks/cache_control_check.py @@ -27,7 +27,7 @@ class _PROXY_CacheControlCheck(CustomLogger): call_type: str, ): try: - self.print_verbose(f"Inside Cache Control Check Pre-Call Hook") + self.print_verbose("Inside Cache Control Check Pre-Call Hook") allowed_cache_controls = user_api_key_dict.allowed_cache_controls if data.get("cache", None) is None: diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 945799b4cf..089d469af9 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -3,6 +3,7 @@ from fastapi import Request from typing import Any, Dict, Optional, TYPE_CHECKING from litellm.proxy._types import UserAPIKeyAuth from litellm._logging import verbose_proxy_logger, verbose_logger +from litellm.types.utils import SupportedCacheControls if TYPE_CHECKING: from litellm.proxy.proxy_server import ProxyConfig as _ProxyConfig @@ -68,6 +69,15 @@ async def add_litellm_data_to_request( cache_dict = parse_cache_control(cache_control_header) data["ttl"] = cache_dict.get("s-maxage") + ### KEY-LEVEL CACHNG + key_metadata = user_api_key_dict.metadata + if "cache" in key_metadata: + data["cache"] = {} + if isinstance(key_metadata["cache"], dict): + for k, v in key_metadata["cache"].items(): + if k in SupportedCacheControls: + data["cache"][k] = v + verbose_proxy_logger.debug("receiving data: %s", data) # users can pass in 'user' param to /chat/completions. Don't override it if data.get("user", None) is None and user_api_key_dict.user_id is not None: diff --git a/litellm/types/utils.py b/litellm/types/utils.py index f16d9567fa..2b6aefcf59 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -7,6 +7,9 @@ class LiteLLMCommonStrings(Enum): redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'" +SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"] + + class CostPerToken(TypedDict): input_cost_per_token: float output_cost_per_token: float