forked from phoenix/litellm-mirror
(refactor) caching use LLMCachingHandler for async_get_cache and set_cache (#6208)
* use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * fix test_embedding_caching_azure_individual_items_reordered
This commit is contained in:
parent
20e50d7002
commit
4d1b4beb3d
96 changed files with 690 additions and 489 deletions
|
@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';
|
|||
|
||||
# Caching - In-Memory, Redis, s3, Redis Semantic Cache, Disk
|
||||
|
||||
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/caching.py)
|
||||
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm.caching.caching.py)
|
||||
|
||||
:::info
|
||||
|
||||
|
@ -31,7 +31,7 @@ For the hosted version you can setup your own Redis DB here: https://app.redisla
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
litellm.cache = Cache(type="redis", host=<host>, port=<port>, password=<password>)
|
||||
|
||||
|
@ -68,7 +68,7 @@ AWS_SECRET_ACCESS_KEY = "WOl*****"
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
# pass s3-bucket name
|
||||
litellm.cache = Cache(type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2")
|
||||
|
@ -101,7 +101,7 @@ For the hosted version you can setup your own Redis DB here: https://app.redisla
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
random_number = random.randint(
|
||||
1, 100000
|
||||
|
@ -155,7 +155,7 @@ To set up a Qdrant cluster locally follow: https://qdrant.tech/documentation/qui
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
random_number = random.randint(
|
||||
1, 100000
|
||||
|
@ -210,7 +210,7 @@ assert response1.id == response2.id
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
litellm.cache = Cache()
|
||||
|
||||
# Make completion calls
|
||||
|
@ -246,7 +246,7 @@ Then you can use the disk cache as follows.
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
litellm.cache = Cache(type="disk")
|
||||
|
||||
# Make completion calls
|
||||
|
@ -422,7 +422,7 @@ def custom_get_cache_key(*args, **kwargs):
|
|||
|
||||
Set your function as litellm.cache.get_cache_key
|
||||
```python
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
cache = Cache(type="redis", host=os.environ['REDIS_HOST'], port=os.environ['REDIS_PORT'], password=os.environ['REDIS_PASSWORD'])
|
||||
|
||||
|
@ -434,7 +434,7 @@ litellm.cache = cache # set litellm.cache to your cache
|
|||
## How to write custom add/get cache functions
|
||||
### 1. Init Cache
|
||||
```python
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
cache = Cache()
|
||||
```
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ Use api.litellm.ai for caching `completion()` and `embedding()` responses
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
litellm.cache = Cache(type="hosted") # init cache to use api.litellm.ai
|
||||
|
||||
# Make completion calls
|
||||
|
@ -31,7 +31,7 @@ response2 = completion(
|
|||
import time
|
||||
import litellm
|
||||
from litellm import completion, embedding
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
litellm.cache = Cache(type="hosted")
|
||||
|
||||
start_time = time.time()
|
||||
|
@ -53,7 +53,7 @@ LiteLLM can cache your streamed responses for you
|
|||
import litellm
|
||||
import time
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
litellm.cache = Cache(type="hosted")
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ Keys in the cache are `model`, the following example will lead to a cache hit
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
litellm.cache = Cache()
|
||||
|
||||
# Make completion calls
|
||||
|
@ -35,7 +35,7 @@ response2 = completion(
|
|||
Add custom key-value pairs to your cache.
|
||||
|
||||
```python
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
cache = Cache()
|
||||
|
||||
cache.add_cache(cache_key="test-key", result="1234")
|
||||
|
@ -50,7 +50,7 @@ LiteLLM can cache your streamed responses for you
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
litellm.cache = Cache()
|
||||
|
||||
# Make completion calls
|
||||
|
@ -77,7 +77,7 @@ Keys in the cache are `model`, the following example will lead to a cache hit
|
|||
import time
|
||||
import litellm
|
||||
from litellm import embedding
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
litellm.cache = Cache()
|
||||
|
||||
start_time = time.time()
|
||||
|
|
|
@ -49,13 +49,13 @@ litellm_settings:
|
|||
cache: true
|
||||
cache_params: # set cache params for redis
|
||||
type: redis
|
||||
namespace: "litellm_caching"
|
||||
namespace: "litellm.caching.caching"
|
||||
```
|
||||
|
||||
and keys will be stored like:
|
||||
|
||||
```
|
||||
litellm_caching:<hash>
|
||||
litellm.caching.caching:<hash>
|
||||
```
|
||||
|
||||
#### Redis Cluster
|
||||
|
|
|
@ -645,7 +645,7 @@ litellm_settings:
|
|||
host: "localhost" # The host address for the Redis cache. Required if type is "redis".
|
||||
port: 6379 # The port number for the Redis cache. Required if type is "redis".
|
||||
password: "your_password" # The password for the Redis cache. Required if type is "redis".
|
||||
namespace: "litellm_caching" # namespace for redis cache
|
||||
namespace: "litellm.caching.caching" # namespace for redis cache
|
||||
|
||||
# Optional - Redis Cluster Settings
|
||||
redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}]
|
||||
|
|
|
@ -25,7 +25,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue