forked from phoenix/litellm-mirror
fix(litellm_pre_call_utils.py): add support for key level caching params
This commit is contained in:
parent
c5a611ca91
commit
af1ae80277
4 changed files with 42 additions and 2 deletions
|
@ -283,7 +283,7 @@ litellm_settings:
|
|||
|
||||
### Turn on / off caching per request.
|
||||
|
||||
The proxy support 3 cache-controls:
|
||||
The proxy support 4 cache-controls:
|
||||
|
||||
- `ttl`: *Optional(int)* - Will cache the response for the user-defined amount of time (in seconds).
|
||||
- `s-maxage`: *Optional(int)* Will only accept cached responses that are within user-defined range (in seconds).
|
||||
|
@ -374,6 +374,33 @@ chat_completion = client.chat.completions.create(
|
|||
)
|
||||
```
|
||||
|
||||
### Turn on / off caching per Key.
|
||||
|
||||
1. Add cache params when creating a key [full list](#turn-on--off-caching-per-key)
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-D '{
|
||||
"user_id": "222",
|
||||
"metadata": {
|
||||
"cache": {
|
||||
"no-cache": true
|
||||
}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
2. Test it!
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://localhost:4000/chat/completions' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: Bearer <YOUR_NEW_KEY>' \
|
||||
-D '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "bom dia"}]}'
|
||||
```
|
||||
|
||||
### Deleting Cache Keys - `/cache/delete`
|
||||
In order to delete a cache key, send a request to `/cache/delete` with the `keys` you want to delete
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ class _PROXY_CacheControlCheck(CustomLogger):
|
|||
call_type: str,
|
||||
):
|
||||
try:
|
||||
self.print_verbose(f"Inside Cache Control Check Pre-Call Hook")
|
||||
self.print_verbose("Inside Cache Control Check Pre-Call Hook")
|
||||
allowed_cache_controls = user_api_key_dict.allowed_cache_controls
|
||||
|
||||
if data.get("cache", None) is None:
|
||||
|
|
|
@ -3,6 +3,7 @@ from fastapi import Request
|
|||
from typing import Any, Dict, Optional, TYPE_CHECKING
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm._logging import verbose_proxy_logger, verbose_logger
|
||||
from litellm.types.utils import SupportedCacheControls
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.proxy.proxy_server import ProxyConfig as _ProxyConfig
|
||||
|
@ -68,6 +69,15 @@ async def add_litellm_data_to_request(
|
|||
cache_dict = parse_cache_control(cache_control_header)
|
||||
data["ttl"] = cache_dict.get("s-maxage")
|
||||
|
||||
### KEY-LEVEL CACHNG
|
||||
key_metadata = user_api_key_dict.metadata
|
||||
if "cache" in key_metadata:
|
||||
data["cache"] = {}
|
||||
if isinstance(key_metadata["cache"], dict):
|
||||
for k, v in key_metadata["cache"].items():
|
||||
if k in SupportedCacheControls:
|
||||
data["cache"][k] = v
|
||||
|
||||
verbose_proxy_logger.debug("receiving data: %s", data)
|
||||
# users can pass in 'user' param to /chat/completions. Don't override it
|
||||
if data.get("user", None) is None and user_api_key_dict.user_id is not None:
|
||||
|
|
|
@ -7,6 +7,9 @@ class LiteLLMCommonStrings(Enum):
|
|||
redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'"
|
||||
|
||||
|
||||
SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"]
|
||||
|
||||
|
||||
class CostPerToken(TypedDict):
|
||||
input_cost_per_token: float
|
||||
output_cost_per_token: float
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue