mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
fix set Caching Default Off
This commit is contained in:
parent
feb354d3bc
commit
74f0e60962
3 changed files with 79 additions and 19 deletions
|
@ -35,7 +35,7 @@ litellm_settings:
|
||||||
|
|
||||||
#### [OPTIONAL] Step 1.5: Add redis namespaces, default ttl
|
#### [OPTIONAL] Step 1.5: Add redis namespaces, default ttl
|
||||||
|
|
||||||
## Namespace
|
#### Namespace
|
||||||
If you want to create some folder for your keys, you can set a namespace, like this:
|
If you want to create some folder for your keys, you can set a namespace, like this:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
@ -52,7 +52,7 @@ and keys will be stored like:
|
||||||
litellm_caching:<hash>
|
litellm_caching:<hash>
|
||||||
```
|
```
|
||||||
|
|
||||||
## Redis Cluster
|
#### Redis Cluster
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
model_list:
|
model_list:
|
||||||
|
@ -68,7 +68,7 @@ litellm_settings:
|
||||||
redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}]
|
redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}]
|
||||||
```
|
```
|
||||||
|
|
||||||
## TTL
|
#### TTL
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
|
@ -81,7 +81,7 @@ litellm_settings:
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## SSL
|
#### SSL
|
||||||
|
|
||||||
just set `REDIS_SSL="True"` in your .env, and LiteLLM will pick this up.
|
just set `REDIS_SSL="True"` in your .env, and LiteLLM will pick this up.
|
||||||
|
|
||||||
|
@ -397,7 +397,7 @@ litellm_settings:
|
||||||
# /chat/completions, /completions, /embeddings, /audio/transcriptions
|
# /chat/completions, /completions, /embeddings, /audio/transcriptions
|
||||||
```
|
```
|
||||||
|
|
||||||
### Turn on / off caching per request.
|
### **Turn on / off caching per request. **
|
||||||
|
|
||||||
The proxy support 4 cache-controls:
|
The proxy support 4 cache-controls:
|
||||||
|
|
||||||
|
@ -699,6 +699,73 @@ x-litellm-cache-key: 586bf3f3c1bf5aecb55bd9996494d3bbc69eb58397163add6d49537762a
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### **Set Caching Default Off - Opt in only **
|
||||||
|
|
||||||
|
1. **Set `mode: default_off` for caching**
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: fake-openai-endpoint
|
||||||
|
litellm_params:
|
||||||
|
model: openai/fake
|
||||||
|
api_key: fake-key
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
|
# default off mode
|
||||||
|
litellm_settings:
|
||||||
|
set_verbose: True
|
||||||
|
cache: True
|
||||||
|
cache_params:
|
||||||
|
mode: default_off # 👈 Key change cache is default_off
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Opting in to cache when cache is default off**
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="openai" label="OpenAI Python SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(api_key=<litellm-api-key>, base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
|
chat_completion = client.chat.completions.create(
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Say this is a test",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
extra_body = { # OpenAI python accepts extra args in extra_body
|
||||||
|
"cache": {"use-cache": True}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl http://localhost:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"cache": {"use-cache": True}
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Say this is a test"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Turn on `batch_redis_requests`
|
### Turn on `batch_redis_requests`
|
||||||
|
|
||||||
|
|
|
@ -5,16 +5,9 @@ model_list:
|
||||||
api_key: fake-key
|
api_key: fake-key
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
guardrails:
|
# default off mode
|
||||||
- guardrail_name: "custom-pre-guard"
|
litellm_settings:
|
||||||
litellm_params:
|
set_verbose: True
|
||||||
guardrail: custom_guardrail.myCustomGuardrail
|
cache: True
|
||||||
mode: "pre_call"
|
cache_params:
|
||||||
- guardrail_name: "custom-during-guard"
|
mode: default_off
|
||||||
litellm_params:
|
|
||||||
guardrail: custom_guardrail.myCustomGuardrail
|
|
||||||
mode: "during_call"
|
|
||||||
- guardrail_name: "custom-post-guard"
|
|
||||||
litellm_params:
|
|
||||||
guardrail: custom_guardrail.myCustomGuardrail
|
|
||||||
mode: "post_call"
|
|
||||||
|
|
|
@ -1604,7 +1604,7 @@ class ProxyConfig:
|
||||||
self._init_cache(cache_params=cache_params)
|
self._init_cache(cache_params=cache_params)
|
||||||
if litellm.cache is not None:
|
if litellm.cache is not None:
|
||||||
verbose_proxy_logger.debug( # noqa
|
verbose_proxy_logger.debug( # noqa
|
||||||
f"{blue_color_code}Set Cache on LiteLLM Proxy: {vars(litellm.cache.cache)}{reset_color_code}"
|
f"{blue_color_code}Set Cache on LiteLLM Proxy= {vars(litellm.cache.cache)}{vars(litellm.cache)}{reset_color_code}"
|
||||||
)
|
)
|
||||||
elif key == "cache" and value is False:
|
elif key == "cache" and value is False:
|
||||||
pass
|
pass
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue