forked from phoenix/litellm-mirror
fix(litellm_pre_call_utils.py): add support for key level caching params
This commit is contained in:
parent
c5a611ca91
commit
af1ae80277
4 changed files with 42 additions and 2 deletions
|
@ -283,7 +283,7 @@ litellm_settings:
|
||||||
|
|
||||||
### Turn on / off caching per request.
|
### Turn on / off caching per request.
|
||||||
|
|
||||||
The proxy support 3 cache-controls:
|
The proxy support 4 cache-controls:
|
||||||
|
|
||||||
- `ttl`: *Optional(int)* - Will cache the response for the user-defined amount of time (in seconds).
|
- `ttl`: *Optional(int)* - Will cache the response for the user-defined amount of time (in seconds).
|
||||||
- `s-maxage`: *Optional(int)* Will only accept cached responses that are within user-defined range (in seconds).
|
- `s-maxage`: *Optional(int)* Will only accept cached responses that are within user-defined range (in seconds).
|
||||||
|
@ -374,6 +374,33 @@ chat_completion = client.chat.completions.create(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Turn on / off caching per Key.
|
||||||
|
|
||||||
|
1. Add cache params when creating a key [full list](#turn-on--off-caching-per-key)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-D '{
|
||||||
|
"user_id": "222",
|
||||||
|
"metadata": {
|
||||||
|
"cache": {
|
||||||
|
"no-cache": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://localhost:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer <YOUR_NEW_KEY>' \
|
||||||
|
-D '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "bom dia"}]}'
|
||||||
|
```
|
||||||
|
|
||||||
### Deleting Cache Keys - `/cache/delete`
|
### Deleting Cache Keys - `/cache/delete`
|
||||||
In order to delete a cache key, send a request to `/cache/delete` with the `keys` you want to delete
|
In order to delete a cache key, send a request to `/cache/delete` with the `keys` you want to delete
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ class _PROXY_CacheControlCheck(CustomLogger):
|
||||||
call_type: str,
|
call_type: str,
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
self.print_verbose(f"Inside Cache Control Check Pre-Call Hook")
|
self.print_verbose("Inside Cache Control Check Pre-Call Hook")
|
||||||
allowed_cache_controls = user_api_key_dict.allowed_cache_controls
|
allowed_cache_controls = user_api_key_dict.allowed_cache_controls
|
||||||
|
|
||||||
if data.get("cache", None) is None:
|
if data.get("cache", None) is None:
|
||||||
|
|
|
@ -3,6 +3,7 @@ from fastapi import Request
|
||||||
from typing import Any, Dict, Optional, TYPE_CHECKING
|
from typing import Any, Dict, Optional, TYPE_CHECKING
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm._logging import verbose_proxy_logger, verbose_logger
|
from litellm._logging import verbose_proxy_logger, verbose_logger
|
||||||
|
from litellm.types.utils import SupportedCacheControls
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from litellm.proxy.proxy_server import ProxyConfig as _ProxyConfig
|
from litellm.proxy.proxy_server import ProxyConfig as _ProxyConfig
|
||||||
|
@ -68,6 +69,15 @@ async def add_litellm_data_to_request(
|
||||||
cache_dict = parse_cache_control(cache_control_header)
|
cache_dict = parse_cache_control(cache_control_header)
|
||||||
data["ttl"] = cache_dict.get("s-maxage")
|
data["ttl"] = cache_dict.get("s-maxage")
|
||||||
|
|
||||||
|
### KEY-LEVEL CACHNG
|
||||||
|
key_metadata = user_api_key_dict.metadata
|
||||||
|
if "cache" in key_metadata:
|
||||||
|
data["cache"] = {}
|
||||||
|
if isinstance(key_metadata["cache"], dict):
|
||||||
|
for k, v in key_metadata["cache"].items():
|
||||||
|
if k in SupportedCacheControls:
|
||||||
|
data["cache"][k] = v
|
||||||
|
|
||||||
verbose_proxy_logger.debug("receiving data: %s", data)
|
verbose_proxy_logger.debug("receiving data: %s", data)
|
||||||
# users can pass in 'user' param to /chat/completions. Don't override it
|
# users can pass in 'user' param to /chat/completions. Don't override it
|
||||||
if data.get("user", None) is None and user_api_key_dict.user_id is not None:
|
if data.get("user", None) is None and user_api_key_dict.user_id is not None:
|
||||||
|
|
|
@ -7,6 +7,9 @@ class LiteLLMCommonStrings(Enum):
|
||||||
redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'"
|
redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'"
|
||||||
|
|
||||||
|
|
||||||
|
SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"]
|
||||||
|
|
||||||
|
|
||||||
class CostPerToken(TypedDict):
|
class CostPerToken(TypedDict):
|
||||||
input_cost_per_token: float
|
input_cost_per_token: float
|
||||||
output_cost_per_token: float
|
output_cost_per_token: float
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue