(refactor) caching use LLMCachingHandler for async_get_cache and set_cache (#6208)

* use folder for caching

* fix importing caching

* fix clickhouse pyright

* fix linting

* fix correctly pass kwargs and args

* fix test case for embedding

* fix linting

* fix embedding caching logic

* fix refactor handle utils.py

* fix test_embedding_caching_azure_individual_items_reordered
This commit is contained in:
Ishaan Jaff 2024-10-14 16:34:01 +05:30 committed by GitHub
parent 20e50d7002
commit 4d1b4beb3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
96 changed files with 690 additions and 489 deletions

View file

@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';
# Caching - In-Memory, Redis, s3, Redis Semantic Cache, Disk
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/caching.py)
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm.caching.caching.py)
:::info
@ -31,7 +31,7 @@ For the hosted version you can setup your own Redis DB here: https://app.redisla
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache(type="redis", host=<host>, port=<port>, password=<password>)
@ -68,7 +68,7 @@ AWS_SECRET_ACCESS_KEY = "WOl*****"
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
# pass s3-bucket name
litellm.cache = Cache(type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2")
@ -101,7 +101,7 @@ For the hosted version you can setup your own Redis DB here: https://app.redisla
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
random_number = random.randint(
1, 100000
@ -155,7 +155,7 @@ To set up a Qdrant cluster locally follow: https://qdrant.tech/documentation/qui
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
random_number = random.randint(
1, 100000
@ -210,7 +210,7 @@ assert response1.id == response2.id
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache()
# Make completion calls
@ -246,7 +246,7 @@ Then you can use the disk cache as follows.
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache(type="disk")
# Make completion calls
@ -422,7 +422,7 @@ def custom_get_cache_key(*args, **kwargs):
Set your function as litellm.cache.get_cache_key
```python
from litellm.caching import Cache
from litellm.caching.caching import Cache
cache = Cache(type="redis", host=os.environ['REDIS_HOST'], port=os.environ['REDIS_PORT'], password=os.environ['REDIS_PASSWORD'])
@ -434,7 +434,7 @@ litellm.cache = cache # set litellm.cache to your cache
## How to write custom add/get cache functions
### 1. Init Cache
```python
from litellm.caching import Cache
from litellm.caching.caching import Cache
cache = Cache()
```

View file

@ -6,7 +6,7 @@ Use api.litellm.ai for caching `completion()` and `embedding()` responses
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache(type="hosted") # init cache to use api.litellm.ai
# Make completion calls
@ -31,7 +31,7 @@ response2 = completion(
import time
import litellm
from litellm import completion, embedding
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache(type="hosted")
start_time = time.time()
@ -53,7 +53,7 @@ LiteLLM can cache your streamed responses for you
import litellm
import time
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache(type="hosted")

View file

@ -13,7 +13,7 @@ Keys in the cache are `model`, the following example will lead to a cache hit
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache()
# Make completion calls
@ -35,7 +35,7 @@ response2 = completion(
Add custom key-value pairs to your cache.
```python
from litellm.caching import Cache
from litellm.caching.caching import Cache
cache = Cache()
cache.add_cache(cache_key="test-key", result="1234")
@ -50,7 +50,7 @@ LiteLLM can cache your streamed responses for you
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache()
# Make completion calls
@ -77,7 +77,7 @@ Keys in the cache are `model`, the following example will lead to a cache hit
import time
import litellm
from litellm import embedding
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache()
start_time = time.time()

View file

@ -49,13 +49,13 @@ litellm_settings:
cache: true
cache_params: # set cache params for redis
type: redis
namespace: "litellm_caching"
namespace: "litellm.caching.caching"
```
and keys will be stored like:
```
litellm_caching:<hash>
litellm.caching.caching:<hash>
```
#### Redis Cluster

View file

@ -645,7 +645,7 @@ litellm_settings:
host: "localhost" # The host address for the Redis cache. Required if type is "redis".
port: 6379 # The port number for the Redis cache. Required if type is "redis".
password: "your_password" # The password for the Redis cache. Required if type is "redis".
namespace: "litellm_caching" # namespace for redis cache
namespace: "litellm.caching.caching" # namespace for redis cache
# Optional - Redis Cluster Settings
redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}]

View file

@ -25,7 +25,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata