(refactor) caching use LLMCachingHandler for async_get_cache and set_cache (#6208)

* use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * fix test_embedding_caching_azure_individual_items_reordered
2024-10-14 16:34:01 +05:30 · 2024-10-14 16:34:01 +05:30 · 4d1b4beb3d
commit 4d1b4beb3d
parent 20e50d7002
96 changed files with 690 additions and 489 deletions
--- a/docs/my-website/docs/caching/all_caches.md
+++ b/docs/my-website/docs/caching/all_caches.md
@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';

 # Caching - In-Memory, Redis, s3, Redis Semantic Cache, Disk

-[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/caching.py)
+[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm.caching.caching.py)

 :::info

@ -31,7 +31,7 @@ For the hosted version you can setup your own Redis DB here: https://app.redisla
 ```python
 import litellm
 from litellm import completion
-from litellm.caching import Cache
+from litellm.caching.caching import Cache

 litellm.cache = Cache(type="redis", host=<host>, port=<port>, password=<password>)

@ -68,7 +68,7 @@ AWS_SECRET_ACCESS_KEY = "WOl*****"
 ```python
 import litellm
 from litellm import completion
-from litellm.caching import Cache
+from litellm.caching.caching import Cache

 # pass s3-bucket name
 litellm.cache = Cache(type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2")
@ -101,7 +101,7 @@ For the hosted version you can setup your own Redis DB here: https://app.redisla
 ```python
 import litellm
 from litellm import completion
-from litellm.caching import Cache
+from litellm.caching.caching import Cache

 random_number = random.randint(
    1, 100000
@ -155,7 +155,7 @@ To set up a Qdrant cluster locally follow: https://qdrant.tech/documentation/qui
 ```python
 import litellm
 from litellm import completion
-from litellm.caching import Cache
+from litellm.caching.caching import Cache

 random_number = random.randint(
    1, 100000
@ -210,7 +210,7 @@ assert response1.id == response2.id
 ```python
 import litellm
 from litellm import completion
-from litellm.caching import Cache
+from litellm.caching.caching import Cache
 litellm.cache = Cache()

 # Make completion calls
@ -246,7 +246,7 @@ Then you can use the disk cache as follows.
 ```python
 import litellm
 from litellm import completion
-from litellm.caching import Cache
+from litellm.caching.caching import Cache
 litellm.cache = Cache(type="disk")

 # Make completion calls
@ -422,7 +422,7 @@ def custom_get_cache_key(*args, **kwargs):

 Set your function as litellm.cache.get_cache_key
 ```python
-from litellm.caching import Cache
+from litellm.caching.caching import Cache

 cache = Cache(type="redis", host=os.environ['REDIS_HOST'], port=os.environ['REDIS_PORT'], password=os.environ['REDIS_PASSWORD'])

@ -434,7 +434,7 @@ litellm.cache = cache # set litellm.cache to your cache
 ## How to write custom add/get cache functions 
 ### 1. Init Cache 
 ```python
-from litellm.caching import Cache
+from litellm.caching.caching import Cache
 cache = Cache()
 ``` 

--- a/docs/my-website/docs/caching/caching_api.md
+++ b/docs/my-website/docs/caching/caching_api.md
@ -6,7 +6,7 @@ Use api.litellm.ai for caching `completion()` and `embedding()` responses
 ```python
 import litellm
 from litellm import completion
-from litellm.caching import Cache
+from litellm.caching.caching import Cache
 litellm.cache = Cache(type="hosted") # init cache to use api.litellm.ai

 # Make completion calls
@ -31,7 +31,7 @@ response2 = completion(
 import time
 import litellm
 from litellm import completion, embedding
-from litellm.caching import Cache
+from litellm.caching.caching import Cache
 litellm.cache = Cache(type="hosted")

 start_time = time.time()
@ -53,7 +53,7 @@ LiteLLM can cache your streamed responses for you
 import litellm
 import time
 from litellm import completion
-from litellm.caching import Cache
+from litellm.caching.caching import Cache

 litellm.cache = Cache(type="hosted")

--- a/docs/my-website/docs/caching/local_caching.md
+++ b/docs/my-website/docs/caching/local_caching.md
@ -13,7 +13,7 @@ Keys in the cache are `model`, the following example will lead to a cache hit
 ```python
 import litellm
 from litellm import completion
-from litellm.caching import Cache
+from litellm.caching.caching import Cache
 litellm.cache = Cache()

 # Make completion calls
@ -35,7 +35,7 @@ response2 = completion(
 Add custom key-value pairs to your cache. 

 ```python 
-from litellm.caching import Cache
+from litellm.caching.caching import Cache
 cache = Cache()

 cache.add_cache(cache_key="test-key", result="1234")
@ -50,7 +50,7 @@ LiteLLM can cache your streamed responses for you
 ```python
 import litellm
 from litellm import completion
-from litellm.caching import Cache
+from litellm.caching.caching import Cache
 litellm.cache = Cache()

 # Make completion calls
@ -77,7 +77,7 @@ Keys in the cache are `model`, the following example will lead to a cache hit
 import time
 import litellm
 from litellm import embedding
-from litellm.caching import Cache
+from litellm.caching.caching import Cache
 litellm.cache = Cache()

 start_time = time.time()
--- a/docs/my-website/docs/proxy/caching.md
+++ b/docs/my-website/docs/proxy/caching.md
@ -49,13 +49,13 @@ litellm_settings:
  cache: true 
  cache_params:        # set cache params for redis
    type: redis
-    namespace: "litellm_caching"
+    namespace: "litellm.caching.caching"
 ```

 and keys will be stored like:

 ```
-litellm_caching:<hash>
+litellm.caching.caching:<hash>
 ```

 #### Redis Cluster 
--- a/docs/my-website/docs/proxy/configs.md
+++ b/docs/my-website/docs/proxy/configs.md
@ -645,7 +645,7 @@ litellm_settings:
    host: "localhost"  # The host address for the Redis cache. Required if type is "redis".
    port: 6379  # The port number for the Redis cache. Required if type is "redis".
    password: "your_password"  # The password for the Redis cache. Required if type is "redis".
-    namespace: "litellm_caching" # namespace for redis cache
+    namespace: "litellm.caching.caching" # namespace for redis cache
  
    # Optional - Redis Cluster Settings
    redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}] 
--- a/docs/my-website/docs/proxy/guardrails/custom_guardrail.md
+++ b/docs/my-website/docs/proxy/guardrails/custom_guardrail.md
@ -25,7 +25,7 @@ from typing import Any, Dict, List, Literal, Optional, Union

 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.integrations.custom_guardrail import CustomGuardrail
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata