fix(litellm_pre_call_utils.py): add support for key level caching params

This commit is contained in:
Krrish Dholakia 2024-06-07 22:07:55 -07:00
parent c5a611ca91
commit af1ae80277
4 changed files with 42 additions and 2 deletions

View file

@ -283,7 +283,7 @@ litellm_settings:
### Turn on / off caching per request.
The proxy support 3 cache-controls:
The proxy support 4 cache-controls:
- `ttl`: *Optional(int)* - Will cache the response for the user-defined amount of time (in seconds).
- `s-maxage`: *Optional(int)* Will only accept cached responses that are within user-defined range (in seconds).
@ -374,6 +374,33 @@ chat_completion = client.chat.completions.create(
)
```
### Turn on / off caching per Key.
1. Add cache params when creating a key [full list](#turn-on--off-caching-per-key)
```bash
curl -X POST 'http://0.0.0.0:4000/key/generate' \
-H 'Authorization: Bearer sk-1234' \
-H 'Content-Type: application/json' \
-D '{
"user_id": "222",
"metadata": {
"cache": {
"no-cache": true
}
}
}'
```
2. Test it!
```bash
curl -X POST 'http://localhost:4000/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer <YOUR_NEW_KEY>' \
-D '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "bom dia"}]}'
```
### Deleting Cache Keys - `/cache/delete`
In order to delete a cache key, send a request to `/cache/delete` with the `keys` you want to delete

View file

@ -27,7 +27,7 @@ class _PROXY_CacheControlCheck(CustomLogger):
call_type: str,
):
try:
self.print_verbose(f"Inside Cache Control Check Pre-Call Hook")
self.print_verbose("Inside Cache Control Check Pre-Call Hook")
allowed_cache_controls = user_api_key_dict.allowed_cache_controls
if data.get("cache", None) is None:

View file

@ -3,6 +3,7 @@ from fastapi import Request
from typing import Any, Dict, Optional, TYPE_CHECKING
from litellm.proxy._types import UserAPIKeyAuth
from litellm._logging import verbose_proxy_logger, verbose_logger
from litellm.types.utils import SupportedCacheControls
if TYPE_CHECKING:
from litellm.proxy.proxy_server import ProxyConfig as _ProxyConfig
@ -68,6 +69,15 @@ async def add_litellm_data_to_request(
cache_dict = parse_cache_control(cache_control_header)
data["ttl"] = cache_dict.get("s-maxage")
### KEY-LEVEL CACHNG
key_metadata = user_api_key_dict.metadata
if "cache" in key_metadata:
data["cache"] = {}
if isinstance(key_metadata["cache"], dict):
for k, v in key_metadata["cache"].items():
if k in SupportedCacheControls:
data["cache"][k] = v
verbose_proxy_logger.debug("receiving data: %s", data)
# users can pass in 'user' param to /chat/completions. Don't override it
if data.get("user", None) is None and user_api_key_dict.user_id is not None:

View file

@ -7,6 +7,9 @@ class LiteLLMCommonStrings(Enum):
redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'"
SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"]
class CostPerToken(TypedDict):
input_cost_per_token: float
output_cost_per_token: float