(refactor) caching use LLMCachingHandler for async_get_cache and set_cache (#6208)

* use folder for caching

* fix importing caching

* fix clickhouse pyright

* fix linting

* fix correctly pass kwargs and args

* fix test case for embedding

* fix linting

* fix embedding caching logic

* fix refactor handle utils.py

* fix test_embedding_caching_azure_individual_items_reordered
This commit is contained in:
Ishaan Jaff 2024-10-14 16:34:01 +05:30 committed by GitHub
parent 20e50d7002
commit 4d1b4beb3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
96 changed files with 690 additions and 489 deletions

View file

@ -10,7 +10,7 @@ sys.path.insert(
import asyncio
from litellm import Router, Timeout
import time
from litellm.caching import Cache
from litellm.caching.caching import Cache
import litellm
import openai

View file

@ -10,7 +10,7 @@ sys.path.insert(
import asyncio
from litellm import Router, Timeout
import time
from litellm.caching import Cache
from litellm.caching.caching import Cache
import litellm
litellm.cache = Cache(

View file

@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';
# Caching - In-Memory, Redis, s3, Redis Semantic Cache, Disk
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/caching.py)
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm.caching.caching.py)
:::info
@ -31,7 +31,7 @@ For the hosted version you can setup your own Redis DB here: https://app.redisla
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache(type="redis", host=<host>, port=<port>, password=<password>)
@ -68,7 +68,7 @@ AWS_SECRET_ACCESS_KEY = "WOl*****"
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
# pass s3-bucket name
litellm.cache = Cache(type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2")
@ -101,7 +101,7 @@ For the hosted version you can setup your own Redis DB here: https://app.redisla
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
random_number = random.randint(
1, 100000
@ -155,7 +155,7 @@ To set up a Qdrant cluster locally follow: https://qdrant.tech/documentation/qui
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
random_number = random.randint(
1, 100000
@ -210,7 +210,7 @@ assert response1.id == response2.id
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache()
# Make completion calls
@ -246,7 +246,7 @@ Then you can use the disk cache as follows.
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache(type="disk")
# Make completion calls
@ -422,7 +422,7 @@ def custom_get_cache_key(*args, **kwargs):
Set your function as litellm.cache.get_cache_key
```python
from litellm.caching import Cache
from litellm.caching.caching import Cache
cache = Cache(type="redis", host=os.environ['REDIS_HOST'], port=os.environ['REDIS_PORT'], password=os.environ['REDIS_PASSWORD'])
@ -434,7 +434,7 @@ litellm.cache = cache # set litellm.cache to your cache
## How to write custom add/get cache functions
### 1. Init Cache
```python
from litellm.caching import Cache
from litellm.caching.caching import Cache
cache = Cache()
```

View file

@ -6,7 +6,7 @@ Use api.litellm.ai for caching `completion()` and `embedding()` responses
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache(type="hosted") # init cache to use api.litellm.ai
# Make completion calls
@ -31,7 +31,7 @@ response2 = completion(
import time
import litellm
from litellm import completion, embedding
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache(type="hosted")
start_time = time.time()
@ -53,7 +53,7 @@ LiteLLM can cache your streamed responses for you
import litellm
import time
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache(type="hosted")

View file

@ -13,7 +13,7 @@ Keys in the cache are `model`, the following example will lead to a cache hit
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache()
# Make completion calls
@ -35,7 +35,7 @@ response2 = completion(
Add custom key-value pairs to your cache.
```python
from litellm.caching import Cache
from litellm.caching.caching import Cache
cache = Cache()
cache.add_cache(cache_key="test-key", result="1234")
@ -50,7 +50,7 @@ LiteLLM can cache your streamed responses for you
```python
import litellm
from litellm import completion
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache()
# Make completion calls
@ -77,7 +77,7 @@ Keys in the cache are `model`, the following example will lead to a cache hit
import time
import litellm
from litellm import embedding
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache()
start_time = time.time()

View file

@ -49,13 +49,13 @@ litellm_settings:
cache: true
cache_params: # set cache params for redis
type: redis
namespace: "litellm_caching"
namespace: "litellm.caching.caching"
```
and keys will be stored like:
```
litellm_caching:<hash>
litellm.caching.caching:<hash>
```
#### Redis Cluster

View file

@ -645,7 +645,7 @@ litellm_settings:
host: "localhost" # The host address for the Redis cache. Required if type is "redis".
port: 6379 # The port number for the Redis cache. Required if type is "redis".
password: "your_password" # The password for the Redis cache. Required if type is "redis".
namespace: "litellm_caching" # namespace for redis cache
namespace: "litellm.caching.caching" # namespace for redis cache
# Optional - Redis Cluster Settings
redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}]

View file

@ -25,7 +25,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata

View file

@ -6,7 +6,7 @@ import dotenv, os
import requests
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from typing import Literal, Union, Optional

View file

@ -13,7 +13,7 @@ sys.path.insert(
) # Adds the parent directory to the system path
from typing import Optional, Literal, Union, Any
import litellm, traceback, sys, uuid
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_guardrail import CustomGuardrail
from fastapi import HTTPException

View file

@ -9,7 +9,7 @@
from typing import Optional, Literal
import litellm
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_proxy_logger

View file

@ -10,7 +10,7 @@
from typing import Optional, Literal
import litellm
from litellm.proxy.utils import PrismaClient
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_proxy_logger

View file

@ -9,7 +9,7 @@
from typing import Optional, Literal, Union
import litellm, traceback, sys, uuid
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException

View file

@ -15,7 +15,7 @@ sys.path.insert(
) # Adds the parent directory to the system path
from typing import Optional, Literal, Union
import litellm, traceback, sys, uuid
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException

View file

@ -13,7 +13,7 @@ import traceback
import sys
import uuid
import os
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException

View file

@ -12,7 +12,7 @@ sys.path.insert(
) # Adds the parent directory to the system path
from typing import Optional, Literal, Union
import litellm, traceback, sys, uuid
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException

View file

@ -12,7 +12,7 @@ sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from typing import Optional
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm._logging import verbose_proxy_logger
import tempfile

View file

@ -7,7 +7,7 @@ import threading
import os
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.caching import Cache
from litellm.caching.caching import Cache
from litellm._logging import (
set_verbose,
_turn_on_debug,

View file

@ -2,7 +2,6 @@ from concurrent.futures import FIRST_COMPLETED, ThreadPoolExecutor, wait
from typing import List, Optional
import litellm
from litellm import completion
from litellm._logging import print_verbose
from litellm.utils import get_optional_params
@ -108,7 +107,7 @@ def batch_completion(
if "kwargs" in kwargs_modified:
original_kwargs = kwargs_modified.pop("kwargs")
future = executor.submit(
completion, **kwargs_modified, **original_kwargs
litellm.completion, **kwargs_modified, **original_kwargs
)
completions.append(future)
@ -156,7 +155,7 @@ def batch_completion_models(*args, **kwargs):
with ThreadPoolExecutor(max_workers=len(models)) as executor:
for model in models:
futures[model] = executor.submit(
completion, *args, model=model, **kwargs
litellm.completion, *args, model=model, **kwargs
)
for model, future in sorted(
@ -178,7 +177,9 @@ def batch_completion_models(*args, **kwargs):
): # don't override deployment values e.g. model name, api base, etc.
deployment[key] = kwargs[key]
kwargs = {**deployment, **nested_kwargs}
futures[deployment["model"]] = executor.submit(completion, **kwargs)
futures[deployment["model"]] = executor.submit(
litellm.completion, **kwargs
)
while futures:
# wait for the first returned future
@ -246,7 +247,7 @@ def batch_completion_models_all_responses(*args, **kwargs):
with concurrent.futures.ThreadPoolExecutor(max_workers=len(models)) as executor:
for idx, model in enumerate(models):
future = executor.submit(completion, *args, model=model, **kwargs)
future = executor.submit(litellm.completion, *args, model=model, **kwargs)
if future.result() is not None:
responses.append(future.result())

View file

@ -212,7 +212,7 @@ class RedisCache(BaseCache):
from litellm._service_logger import ServiceLogging
from ._redis import get_redis_client, get_redis_connection_pool
from .._redis import get_redis_client, get_redis_connection_pool
redis_kwargs = {}
if host is not None:
@ -276,7 +276,7 @@ class RedisCache(BaseCache):
)
def init_async_client(self):
from ._redis import get_redis_async_client
from .._redis import get_redis_async_client
return get_redis_async_client(
connection_pool=self.async_redis_conn_pool, **self.redis_kwargs
@ -302,7 +302,7 @@ class RedisCache(BaseCache):
except Exception as e:
# NON blocking - notify users Redis is throwing an exception
print_verbose(
f"LiteLLM Caching: set() - Got exception from REDIS : {str(e)}"
f"litellm.caching.caching: set() - Got exception from REDIS : {str(e)}"
)
def increment_cache(
@ -705,7 +705,7 @@ class RedisCache(BaseCache):
except Exception as e:
# NON blocking - notify users Redis is throwing an exception
verbose_logger.error(
"LiteLLM Caching: get() - Got exception from REDIS: ", e
"litellm.caching.caching: get() - Got exception from REDIS: ", e
)
def batch_get_cache(self, key_list) -> dict:
@ -781,7 +781,7 @@ class RedisCache(BaseCache):
)
# NON blocking - notify users Redis is throwing an exception
print_verbose(
f"LiteLLM Caching: async get() - Got exception from REDIS: {str(e)}"
f"litellm.caching.caching: async get() - Got exception from REDIS: {str(e)}"
)
async def async_batch_get_cache(self, key_list) -> dict:

View file

@ -0,0 +1,440 @@
"""
This contains LLMCachingHandler
This exposes two methods:
- async_get_cache
- async_set_cache
This file is a wrapper around caching.py
In each method it will call the appropriate method from caching.py
"""
import asyncio
import datetime
import threading
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
from pydantic import BaseModel
import litellm
from litellm._logging import print_verbose
from litellm.caching.caching import (
Cache,
QdrantSemanticCache,
RedisCache,
RedisSemanticCache,
S3Cache,
)
from litellm.types.rerank import RerankResponse
from litellm.types.utils import (
CallTypes,
Embedding,
EmbeddingResponse,
ModelResponse,
TextCompletionResponse,
TranscriptionResponse,
)
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
else:
LiteLLMLoggingObj = Any
class CachingHandlerResponse(BaseModel):
"""
This is the response object for the caching handler. We need to separate embedding cached responses and (completion / text_completion / transcription) cached responses
For embeddings there can be a cache hit for some of the inputs in the list and a cache miss for others
"""
cached_result: Optional[Any] = None
final_embedding_cached_response: Optional[EmbeddingResponse] = None
embedding_all_elements_cache_hit: bool = (
False # this is set to True when all elements in the list have a cache hit in the embedding cache, if true return the final_embedding_cached_response no need to make an API call
)
class LLMCachingHandler:
def __init__(self):
pass
async def _async_get_cache(
self,
model: str,
original_function: Callable,
logging_obj: LiteLLMLoggingObj,
start_time: datetime.datetime,
call_type: str,
kwargs: Dict[str, Any],
args: Optional[Tuple[Any, ...]] = None,
) -> CachingHandlerResponse:
"""
Internal method to get from the cache.
Handles different call types (embeddings, chat/completions, text_completion, transcription)
and accordingly returns the cached response
Args:
model: str:
original_function: Callable:
logging_obj: LiteLLMLoggingObj:
start_time: datetime.datetime:
call_type: str:
kwargs: Dict[str, Any]:
args: Optional[Tuple[Any, ...]] = None:
Returns:
CachingHandlerResponse:
Raises:
None
"""
from litellm.utils import (
CustomStreamWrapper,
convert_to_model_response_object,
convert_to_streaming_response_async,
)
args = args or ()
final_embedding_cached_response: Optional[EmbeddingResponse] = None
cached_result: Optional[Any] = None
if (
(kwargs.get("caching", None) is None and litellm.cache is not None)
or kwargs.get("caching", False) is True
) and (
kwargs.get("cache", {}).get("no-cache", False) is not True
): # allow users to control returning cached responses from the completion function
# checking cache
print_verbose("INSIDE CHECKING CACHE")
if (
litellm.cache is not None
and litellm.cache.supported_call_types is not None
and str(original_function.__name__)
in litellm.cache.supported_call_types
):
print_verbose("Checking Cache")
if call_type == CallTypes.aembedding.value and isinstance(
kwargs["input"], list
):
tasks = []
for idx, i in enumerate(kwargs["input"]):
preset_cache_key = litellm.cache.get_cache_key(
*args, **{**kwargs, "input": i}
)
tasks.append(
litellm.cache.async_get_cache(cache_key=preset_cache_key)
)
cached_result = await asyncio.gather(*tasks)
## check if cached result is None ##
if cached_result is not None and isinstance(cached_result, list):
# set cached_result to None if all elements are None
if all(result is None for result in cached_result):
cached_result = None
elif isinstance(litellm.cache.cache, RedisSemanticCache) or isinstance(
litellm.cache.cache, RedisCache
):
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
kwargs["preset_cache_key"] = (
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
)
cached_result = await litellm.cache.async_get_cache(*args, **kwargs)
elif isinstance(litellm.cache.cache, QdrantSemanticCache):
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
kwargs["preset_cache_key"] = (
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
)
cached_result = await litellm.cache.async_get_cache(*args, **kwargs)
else: # for s3 caching. [NOT RECOMMENDED IN PROD - this will slow down responses since boto3 is sync]
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
kwargs["preset_cache_key"] = (
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
)
cached_result = litellm.cache.get_cache(*args, **kwargs)
if cached_result is not None and not isinstance(cached_result, list):
print_verbose("Cache Hit!")
cache_hit = True
end_time = datetime.datetime.now()
(
model,
custom_llm_provider,
dynamic_api_key,
api_base,
) = litellm.get_llm_provider(
model=model,
custom_llm_provider=kwargs.get("custom_llm_provider", None),
api_base=kwargs.get("api_base", None),
api_key=kwargs.get("api_key", None),
)
print_verbose(
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
)
logging_obj.update_environment_variables(
model=model,
user=kwargs.get("user", None),
optional_params={},
litellm_params={
"logger_fn": kwargs.get("logger_fn", None),
"acompletion": True,
"metadata": kwargs.get("metadata", {}),
"model_info": kwargs.get("model_info", {}),
"proxy_server_request": kwargs.get(
"proxy_server_request", None
),
"preset_cache_key": kwargs.get("preset_cache_key", None),
"stream_response": kwargs.get("stream_response", {}),
"api_base": kwargs.get("api_base", ""),
},
input=kwargs.get("messages", ""),
api_key=kwargs.get("api_key", None),
original_response=str(cached_result),
additional_args=None,
stream=kwargs.get("stream", False),
)
call_type = original_function.__name__
if call_type == CallTypes.acompletion.value and isinstance(
cached_result, dict
):
if kwargs.get("stream", False) is True:
cached_result = convert_to_streaming_response_async(
response_object=cached_result,
)
cached_result = CustomStreamWrapper(
completion_stream=cached_result,
model=model,
custom_llm_provider="cached_response",
logging_obj=logging_obj,
)
else:
cached_result = convert_to_model_response_object(
response_object=cached_result,
model_response_object=ModelResponse(),
)
if call_type == CallTypes.atext_completion.value and isinstance(
cached_result, dict
):
if kwargs.get("stream", False) is True:
cached_result = convert_to_streaming_response_async(
response_object=cached_result,
)
cached_result = CustomStreamWrapper(
completion_stream=cached_result,
model=model,
custom_llm_provider="cached_response",
logging_obj=logging_obj,
)
else:
cached_result = TextCompletionResponse(**cached_result)
elif call_type == CallTypes.aembedding.value and isinstance(
cached_result, dict
):
cached_result = convert_to_model_response_object(
response_object=cached_result,
model_response_object=EmbeddingResponse(),
response_type="embedding",
)
elif call_type == CallTypes.arerank.value and isinstance(
cached_result, dict
):
cached_result = convert_to_model_response_object(
response_object=cached_result,
model_response_object=None,
response_type="rerank",
)
elif call_type == CallTypes.atranscription.value and isinstance(
cached_result, dict
):
hidden_params = {
"model": "whisper-1",
"custom_llm_provider": custom_llm_provider,
"cache_hit": True,
}
cached_result = convert_to_model_response_object(
response_object=cached_result,
model_response_object=TranscriptionResponse(),
response_type="audio_transcription",
hidden_params=hidden_params,
)
if kwargs.get("stream", False) is False:
# LOG SUCCESS
asyncio.create_task(
logging_obj.async_success_handler(
cached_result, start_time, end_time, cache_hit
)
)
threading.Thread(
target=logging_obj.success_handler,
args=(cached_result, start_time, end_time, cache_hit),
).start()
cache_key = kwargs.get("preset_cache_key", None)
if (
isinstance(cached_result, BaseModel)
or isinstance(cached_result, CustomStreamWrapper)
) and hasattr(cached_result, "_hidden_params"):
cached_result._hidden_params["cache_key"] = cache_key # type: ignore
return CachingHandlerResponse(cached_result=cached_result)
elif (
call_type == CallTypes.aembedding.value
and cached_result is not None
and isinstance(cached_result, list)
and litellm.cache is not None
and not isinstance(
litellm.cache.cache, S3Cache
) # s3 doesn't support bulk writing. Exclude.
):
remaining_list = []
non_null_list = []
for idx, cr in enumerate(cached_result):
if cr is None:
remaining_list.append(kwargs["input"][idx])
else:
non_null_list.append((idx, cr))
original_kwargs_input = kwargs["input"]
kwargs["input"] = remaining_list
if len(non_null_list) > 0:
print_verbose(f"EMBEDDING CACHE HIT! - {len(non_null_list)}")
final_embedding_cached_response = EmbeddingResponse(
model=kwargs.get("model"),
data=[None] * len(original_kwargs_input),
)
final_embedding_cached_response._hidden_params["cache_hit"] = (
True
)
for val in non_null_list:
idx, cr = val # (idx, cr) tuple
if cr is not None:
final_embedding_cached_response.data[idx] = Embedding(
embedding=cr["embedding"],
index=idx,
object="embedding",
)
if len(remaining_list) == 0:
# LOG SUCCESS
cache_hit = True
end_time = datetime.datetime.now()
(
model,
custom_llm_provider,
dynamic_api_key,
api_base,
) = litellm.get_llm_provider(
model=model,
custom_llm_provider=kwargs.get("custom_llm_provider", None),
api_base=kwargs.get("api_base", None),
api_key=kwargs.get("api_key", None),
)
print_verbose(
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
)
logging_obj.update_environment_variables(
model=model,
user=kwargs.get("user", None),
optional_params={},
litellm_params={
"logger_fn": kwargs.get("logger_fn", None),
"acompletion": True,
"metadata": kwargs.get("metadata", {}),
"model_info": kwargs.get("model_info", {}),
"proxy_server_request": kwargs.get(
"proxy_server_request", None
),
"preset_cache_key": kwargs.get(
"preset_cache_key", None
),
"stream_response": kwargs.get("stream_response", {}),
"api_base": "",
},
input=kwargs.get("messages", ""),
api_key=kwargs.get("api_key", None),
original_response=str(final_embedding_cached_response),
additional_args=None,
stream=kwargs.get("stream", False),
)
asyncio.create_task(
logging_obj.async_success_handler(
final_embedding_cached_response,
start_time,
end_time,
cache_hit,
)
)
threading.Thread(
target=logging_obj.success_handler,
args=(
final_embedding_cached_response,
start_time,
end_time,
cache_hit,
),
).start()
return CachingHandlerResponse(
final_embedding_cached_response=final_embedding_cached_response,
embedding_all_elements_cache_hit=True,
)
return CachingHandlerResponse(
cached_result=cached_result,
final_embedding_cached_response=final_embedding_cached_response,
)
async def _async_set_cache(
self,
result: Any,
original_function: Callable,
kwargs: Dict[str, Any],
args: Optional[Tuple[Any, ...]] = None,
):
"""
Internal method to check the type of the result & cache used and adds the result to the cache accordingly
Args:
result: Any:
original_function: Callable:
kwargs: Dict[str, Any]:
args: Optional[Tuple[Any, ...]] = None:
Returns:
None
Raises:
None
"""
args = args or ()
# [OPTIONAL] ADD TO CACHE
if (
(litellm.cache is not None)
and litellm.cache.supported_call_types is not None
and (str(original_function.__name__) in litellm.cache.supported_call_types)
and (kwargs.get("cache", {}).get("no-store", False) is not True)
):
if (
isinstance(result, litellm.ModelResponse)
or isinstance(result, litellm.EmbeddingResponse)
or isinstance(result, TranscriptionResponse)
or isinstance(result, RerankResponse)
):
if (
isinstance(result, EmbeddingResponse)
and isinstance(kwargs["input"], list)
and litellm.cache is not None
and not isinstance(
litellm.cache.cache, S3Cache
) # s3 doesn't support bulk writing. Exclude.
):
asyncio.create_task(
litellm.cache.async_add_cache_pipeline(result, *args, **kwargs)
)
elif isinstance(litellm.cache.cache, S3Cache):
threading.Thread(
target=litellm.cache.add_cache,
args=(result,) + args,
kwargs=kwargs,
).start()
else:
asyncio.create_task(
litellm.cache.async_add_cache(result.json(), *args, **kwargs)
)
else:
asyncio.create_task(
litellm.cache.async_add_cache(result, *args, **kwargs)
)

View file

@ -43,7 +43,7 @@
# ### REDIS
# # if len(os.getenv("REDIS_HOST", "")) > 0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
# # print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
# # from litellm.caching import Cache
# # from litellm.caching.caching import Cache
# # litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
# # print("\033[92mLiteLLM: Switched on Redis caching\033[0m")

View file

@ -21,7 +21,7 @@ import litellm.litellm_core_utils
import litellm.litellm_core_utils.litellm_logging
import litellm.types
from litellm._logging import verbose_logger, verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.litellm_core_utils.exception_mapping_utils import (
_add_key_name_and_team_to_alert,

View file

@ -13,7 +13,7 @@ import requests
import litellm
from litellm._logging import verbose_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.utils import StandardLoggingPayload
@ -29,14 +29,30 @@ def create_client():
clickhouse_host = os.getenv("CLICKHOUSE_HOST")
if clickhouse_host is not None:
verbose_logger.debug("setting up clickhouse")
port = os.getenv("CLICKHOUSE_PORT")
if port is not None and isinstance(port, str):
port = int(port)
host: Optional[str] = os.getenv("CLICKHOUSE_HOST")
if host is None:
raise ValueError("CLICKHOUSE_HOST is not set")
username: Optional[str] = os.getenv("CLICKHOUSE_USERNAME")
if username is None:
raise ValueError("CLICKHOUSE_USERNAME is not set")
password: Optional[str] = os.getenv("CLICKHOUSE_PASSWORD")
if password is None:
raise ValueError("CLICKHOUSE_PASSWORD is not set")
if port is None:
raise ValueError("CLICKHOUSE_PORT is not set")
client = clickhouse_connect.get_client(
host=os.getenv("CLICKHOUSE_HOST"),
host=host,
port=port,
username=os.getenv("CLICKHOUSE_USERNAME"),
password=os.getenv("CLICKHOUSE_PASSWORD"),
username=username,
password=password,
)
return client
else:
@ -176,11 +192,29 @@ def _start_clickhouse():
if port is not None and isinstance(port, str):
port = int(port)
port = os.getenv("CLICKHOUSE_PORT")
if port is not None and isinstance(port, str):
port = int(port)
host: Optional[str] = os.getenv("CLICKHOUSE_HOST")
if host is None:
raise ValueError("CLICKHOUSE_HOST is not set")
username: Optional[str] = os.getenv("CLICKHOUSE_USERNAME")
if username is None:
raise ValueError("CLICKHOUSE_USERNAME is not set")
password: Optional[str] = os.getenv("CLICKHOUSE_PASSWORD")
if password is None:
raise ValueError("CLICKHOUSE_PASSWORD is not set")
if port is None:
raise ValueError("CLICKHOUSE_PORT is not set")
client = clickhouse_connect.get_client(
host=os.getenv("CLICKHOUSE_HOST"),
host=host,
port=port,
username=os.getenv("CLICKHOUSE_USERNAME"),
password=os.getenv("CLICKHOUSE_PASSWORD"),
username=username,
password=password,
)
# view all tables in DB
response = client.query("SHOW TABLES")
@ -241,11 +275,25 @@ class ClickhouseLogger:
if port is not None and isinstance(port, str):
port = int(port)
host: Optional[str] = os.getenv("CLICKHOUSE_HOST")
if host is None:
raise ValueError("CLICKHOUSE_HOST is not set")
username: Optional[str] = os.getenv("CLICKHOUSE_USERNAME")
if username is None:
raise ValueError("CLICKHOUSE_USERNAME is not set")
password: Optional[str] = os.getenv("CLICKHOUSE_PASSWORD")
if password is None:
raise ValueError("CLICKHOUSE_PASSWORD is not set")
if port is None:
raise ValueError("CLICKHOUSE_PORT is not set")
client = clickhouse_connect.get_client(
host=os.getenv("CLICKHOUSE_HOST"),
host=host,
port=port,
username=os.getenv("CLICKHOUSE_USERNAME"),
password=os.getenv("CLICKHOUSE_PASSWORD"),
username=username,
password=password,
)
self.client = client

View file

@ -8,7 +8,7 @@ from typing import Any, Literal, Optional, Tuple, Union
import dotenv
from pydantic import BaseModel
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.llms.openai import ChatCompletionRequest
from litellm.types.services import ServiceLoggerPayload

View file

@ -23,7 +23,7 @@ from litellm import (
turn_off_message_logging,
verbose_logger,
)
from litellm.caching import DualCache, InMemoryCache, S3Cache
from litellm.caching.caching import DualCache, InMemoryCache, S3Cache
from litellm.cost_calculator import _select_model_name_for_cost_calc
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_logger import CustomLogger

View file

@ -10,7 +10,7 @@ from openai import AsyncAzureOpenAI, AzureOpenAI
from typing_extensions import overload
import litellm
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.types.utils import EmbeddingResponse

View file

@ -6,7 +6,7 @@ from typing import Dict, List, Optional, Tuple
import httpx
from litellm._logging import verbose_logger
from litellm.caching import DualCache, InMemoryCache
from litellm.caching.caching import DualCache, InMemoryCache
from litellm.secret_managers.main import get_secret
from .base import BaseLLM

View file

@ -29,7 +29,7 @@ import requests # type: ignore
import litellm
from litellm import verbose_logger
from litellm.caching import InMemoryCache
from litellm.caching.caching import InMemoryCache
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.llms.custom_httpx.http_handler import (

View file

@ -8,7 +8,7 @@ from httpx import Response
import litellm
from litellm import verbose_logger
from litellm.caching import InMemoryCache
from litellm.caching.caching import InMemoryCache
from litellm.llms.custom_httpx.http_handler import (
_get_httpx_client,
get_async_httpx_client,

View file

@ -4,7 +4,7 @@ from typing import Callable, List, Literal, Optional, Tuple, Union
import httpx
import litellm
from litellm.caching import Cache
from litellm.caching.caching import Cache
from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.llms.OpenAI.openai import AllMessageValues

View file

@ -65,7 +65,7 @@ from litellm.utils import (
)
from ._logging import verbose_logger
from .caching import disable_cache, enable_cache, update_cache
from .caching.caching import disable_cache, enable_cache, update_cache
from .llms import (
aleph_alpha,
baseten,

View file

@ -16,7 +16,7 @@ from pydantic import BaseModel
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import (
LiteLLM_EndUserTable,
LiteLLM_JWTAuth,

View file

@ -15,7 +15,7 @@ from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import serialization
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable
from litellm.proxy.utils import PrismaClient

View file

@ -5,7 +5,7 @@ from fastapi import APIRouter, Depends, HTTPException, Request
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching import RedisCache
from litellm.caching.caching import RedisCache
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
router = APIRouter(

View file

@ -2,7 +2,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata

View file

@ -25,7 +25,7 @@ from fastapi import HTTPException
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.litellm_core_utils.logging_utils import (
convert_litellm_response_object_to_str,

View file

@ -25,7 +25,7 @@ from fastapi import HTTPException
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.litellm_core_utils.logging_utils import (
convert_litellm_response_object_to_str,

View file

@ -2,7 +2,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata

View file

@ -21,7 +21,7 @@ from pydantic import BaseModel
import litellm # noqa: E401
from litellm import get_secret
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.proxy._types import UserAPIKeyAuth
from litellm.utils import (

View file

@ -465,7 +465,7 @@ async def health_readiness():
# check Cache
cache_type = None
if litellm.cache is not None:
from litellm.caching import RedisSemanticCache
from litellm.caching.caching import RedisSemanticCache
cache_type = litellm.cache.type

View file

@ -7,7 +7,7 @@ from fastapi import HTTPException
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth

View file

@ -11,7 +11,7 @@ from fastapi import HTTPException
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache, InMemoryCache, RedisCache
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth

View file

@ -7,7 +7,7 @@ from fastapi import HTTPException
import litellm
from litellm import verbose_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth

View file

@ -14,7 +14,7 @@ from fastapi import HTTPException
import litellm
from litellm import ModelResponse, Router
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.router import ModelGroupInfo

View file

@ -4,7 +4,7 @@ from fastapi import HTTPException
import litellm
from litellm import verbose_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth

View file

@ -9,7 +9,7 @@ from pydantic import BaseModel
import litellm
from litellm import ModelResponse
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
from litellm.proxy._types import CurrentItemRateLimit, UserAPIKeyAuth

View file

@ -19,7 +19,7 @@ from fastapi import HTTPException
import litellm # noqa: E401
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth
from litellm.utils import (

View file

@ -18,7 +18,7 @@ from typing_extensions import overload
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.llms.prompt_templates.factory import prompt_injection_detection_default_pt
from litellm.proxy._types import LiteLLMPromptInjectionParams, UserAPIKeyAuth

View file

@ -112,7 +112,7 @@ from litellm import (
RetrieveBatchRequest,
)
from litellm._logging import verbose_proxy_logger, verbose_router_logger
from litellm.caching import DualCache, RedisCache
from litellm.caching.caching import DualCache, RedisCache
from litellm.exceptions import RejectedRequestError
from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
from litellm.litellm_core_utils.core_helpers import (
@ -1554,7 +1554,7 @@ class ProxyConfig:
for key, value in litellm_settings.items():
if key == "cache" and value is True:
print(f"{blue_color_code}\nSetting Cache on Proxy") # noqa
from litellm.caching import Cache
from litellm.caching.caching import Cache
cache_params = {}
if "cache_params" in litellm_settings:

View file

@ -49,7 +49,7 @@ from litellm import (
)
from litellm._logging import verbose_proxy_logger
from litellm._service_logger import ServiceLogging, ServiceTypes
from litellm.caching import DualCache, RedisCache
from litellm.caching.caching import DualCache, RedisCache
from litellm.exceptions import RejectedRequestError
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_logger import CustomLogger

View file

@ -37,7 +37,7 @@ import litellm.litellm_core_utils.exception_mapping_utils
from litellm import get_secret_str
from litellm._logging import verbose_router_logger
from litellm.assistants.main import AssistantDeleted
from litellm.caching import DualCache, InMemoryCache, RedisCache
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc

View file

@ -14,7 +14,7 @@ from typing import Optional
import dotenv # type: ignore
import requests
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger

View file

@ -9,7 +9,7 @@ from pydantic import BaseModel
import litellm
from litellm import ModelResponse, token_counter, verbose_logger
from litellm._logging import verbose_router_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger

View file

@ -9,7 +9,7 @@ from pydantic import BaseModel
import litellm
from litellm import ModelResponse, token_counter, verbose_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger

View file

@ -12,7 +12,7 @@ from pydantic import BaseModel
from litellm import token_counter
from litellm._logging import verbose_router_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.utils import print_verbose

View file

@ -10,7 +10,7 @@ from pydantic import BaseModel
import litellm
from litellm import token_counter
from litellm._logging import verbose_logger, verbose_router_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.types.router import RouterErrors
from litellm.utils import get_utc_datetime, print_verbose

View file

@ -7,7 +7,7 @@ import time
from typing import List, Optional, Tuple, TypedDict
from litellm import verbose_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
class CooldownCacheValue(TypedDict):

View file

@ -1,9 +1,11 @@
import heapq
from pydantic import BaseModel
from typing import Optional
import enum
from litellm.caching import DualCache, RedisCache
import heapq
from typing import Optional
from pydantic import BaseModel
from litellm import print_verbose
from litellm.caching.caching import DualCache, RedisCache
class SchedulerCacheKeys(enum.Enum):

View file

@ -4,7 +4,7 @@ from typing import Optional
import litellm
from litellm._logging import verbose_logger
from litellm.caching import InMemoryCache
from litellm.caching.caching import InMemoryCache
from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
from litellm.llms.custom_httpx.http_handler import _get_httpx_client
from litellm.proxy._types import CommonProxyErrors, KeyManagementSystem

View file

@ -12,7 +12,7 @@ from dotenv import load_dotenv
import litellm
from litellm._logging import print_verbose, verbose_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.proxy._types import KeyManagementSystem

View file

@ -56,7 +56,10 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
import litellm.litellm_core_utils
import litellm.litellm_core_utils.audio_utils.utils
import litellm.litellm_core_utils.json_validation_rule
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.caching.caching_handler import CachingHandlerResponse, LLMCachingHandler
_llm_caching_handler = LLMCachingHandler()
from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.litellm_core_utils.exception_mapping_utils import (
@ -146,7 +149,13 @@ from typing import (
from openai import OpenAIError as OriginalError
from ._logging import verbose_logger
from .caching import Cache, QdrantSemanticCache, RedisCache, RedisSemanticCache, S3Cache
from .caching.caching import (
Cache,
QdrantSemanticCache,
RedisCache,
RedisSemanticCache,
S3Cache,
)
from .exceptions import (
APIConnectionError,
APIError,
@ -1121,299 +1130,26 @@ def client(original_function):
print_verbose(
f"ASYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}"
)
# if caching is false, don't run this
final_embedding_cached_response = None
if (
(kwargs.get("caching", None) is None and litellm.cache is not None)
or kwargs.get("caching", False) is True
) and (
kwargs.get("cache", {}).get("no-cache", False) is not True
): # allow users to control returning cached responses from the completion function
# checking cache
print_verbose("INSIDE CHECKING CACHE")
if (
litellm.cache is not None
and litellm.cache.supported_call_types is not None
and str(original_function.__name__)
in litellm.cache.supported_call_types
):
print_verbose("Checking Cache")
if call_type == CallTypes.aembedding.value and isinstance(
kwargs["input"], list
):
tasks = []
for idx, i in enumerate(kwargs["input"]):
preset_cache_key = litellm.cache.get_cache_key(
*args, **{**kwargs, "input": i}
)
tasks.append(
litellm.cache.async_get_cache(
cache_key=preset_cache_key
)
)
cached_result = await asyncio.gather(*tasks)
## check if cached result is None ##
if cached_result is not None and isinstance(
cached_result, list
):
if len(cached_result) == 1 and cached_result[0] is None:
cached_result = None
elif isinstance(
litellm.cache.cache, RedisSemanticCache
) or isinstance(litellm.cache.cache, RedisCache):
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
kwargs["preset_cache_key"] = (
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
)
cached_result = await litellm.cache.async_get_cache(
*args, **kwargs
)
elif isinstance(litellm.cache.cache, QdrantSemanticCache):
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
kwargs["preset_cache_key"] = (
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
)
cached_result = await litellm.cache.async_get_cache(
*args, **kwargs
)
else: # for s3 caching. [NOT RECOMMENDED IN PROD - this will slow down responses since boto3 is sync]
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
kwargs["preset_cache_key"] = (
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
)
cached_result = litellm.cache.get_cache(*args, **kwargs)
if cached_result is not None and not isinstance(
cached_result, list
):
print_verbose("Cache Hit!", log_level="INFO")
cache_hit = True
end_time = datetime.datetime.now()
(
model,
custom_llm_provider,
dynamic_api_key,
api_base,
) = litellm.get_llm_provider(
_caching_handler_response: CachingHandlerResponse = (
await _llm_caching_handler._async_get_cache(
model=model,
custom_llm_provider=kwargs.get("custom_llm_provider", None),
api_base=kwargs.get("api_base", None),
api_key=kwargs.get("api_key", None),
)
print_verbose(
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
)
logging_obj.update_environment_variables(
model=model,
user=kwargs.get("user", None),
optional_params={},
litellm_params={
"logger_fn": kwargs.get("logger_fn", None),
"acompletion": True,
"metadata": kwargs.get("metadata", {}),
"model_info": kwargs.get("model_info", {}),
"proxy_server_request": kwargs.get(
"proxy_server_request", None
),
"preset_cache_key": kwargs.get(
"preset_cache_key", None
),
"stream_response": kwargs.get("stream_response", {}),
"api_base": kwargs.get("api_base", ""),
},
input=kwargs.get("messages", ""),
api_key=kwargs.get("api_key", None),
original_response=str(cached_result),
additional_args=None,
stream=kwargs.get("stream", False),
)
call_type = original_function.__name__
if call_type == CallTypes.acompletion.value and isinstance(
cached_result, dict
):
if kwargs.get("stream", False) is True:
cached_result = convert_to_streaming_response_async(
response_object=cached_result,
)
cached_result = CustomStreamWrapper(
completion_stream=cached_result,
model=model,
custom_llm_provider="cached_response",
original_function=original_function,
logging_obj=logging_obj,
start_time=start_time,
call_type=call_type,
kwargs=kwargs,
args=args,
)
else:
cached_result = convert_to_model_response_object(
response_object=cached_result,
model_response_object=ModelResponse(),
)
if (
call_type == CallTypes.atext_completion.value
and isinstance(cached_result, dict)
_caching_handler_response.cached_result is not None
and _caching_handler_response.final_embedding_cached_response is None
):
if kwargs.get("stream", False) is True:
cached_result = convert_to_streaming_response_async(
response_object=cached_result,
)
cached_result = CustomStreamWrapper(
completion_stream=cached_result,
model=model,
custom_llm_provider="cached_response",
logging_obj=logging_obj,
)
else:
cached_result = TextCompletionResponse(**cached_result)
elif call_type == CallTypes.aembedding.value and isinstance(
cached_result, dict
):
cached_result = convert_to_model_response_object(
response_object=cached_result,
model_response_object=EmbeddingResponse(),
response_type="embedding",
)
elif call_type == CallTypes.arerank.value and isinstance(
cached_result, dict
):
cached_result = convert_to_model_response_object(
response_object=cached_result,
model_response_object=None,
response_type="rerank",
)
elif call_type == CallTypes.atranscription.value and isinstance(
cached_result, dict
):
hidden_params = {
"model": "whisper-1",
"custom_llm_provider": custom_llm_provider,
"cache_hit": True,
}
cached_result = convert_to_model_response_object(
response_object=cached_result,
model_response_object=TranscriptionResponse(),
response_type="audio_transcription",
hidden_params=hidden_params,
)
if kwargs.get("stream", False) is False:
# LOG SUCCESS
asyncio.create_task(
logging_obj.async_success_handler(
cached_result, start_time, end_time, cache_hit
)
)
threading.Thread(
target=logging_obj.success_handler,
args=(cached_result, start_time, end_time, cache_hit),
).start()
cache_key = kwargs.get("preset_cache_key", None)
if (
isinstance(cached_result, BaseModel)
or isinstance(cached_result, CustomStreamWrapper)
) and hasattr(cached_result, "_hidden_params"):
cached_result._hidden_params["cache_key"] = cache_key # type: ignore
return cached_result
elif (
call_type == CallTypes.aembedding.value
and cached_result is not None
and isinstance(cached_result, list)
and litellm.cache is not None
and not isinstance(
litellm.cache.cache, S3Cache
) # s3 doesn't support bulk writing. Exclude.
):
remaining_list = []
non_null_list = []
for idx, cr in enumerate(cached_result):
if cr is None:
remaining_list.append(kwargs["input"][idx])
else:
non_null_list.append((idx, cr))
original_kwargs_input = kwargs["input"]
kwargs["input"] = remaining_list
if len(non_null_list) > 0:
print_verbose(
f"EMBEDDING CACHE HIT! - {len(non_null_list)}"
)
final_embedding_cached_response = EmbeddingResponse(
model=kwargs.get("model"),
data=[None] * len(original_kwargs_input),
)
final_embedding_cached_response._hidden_params[
"cache_hit"
] = True
return _caching_handler_response.cached_result
elif _caching_handler_response.embedding_all_elements_cache_hit is True:
return _caching_handler_response.final_embedding_cached_response
for val in non_null_list:
idx, cr = val # (idx, cr) tuple
if cr is not None:
final_embedding_cached_response.data[idx] = (
Embedding(
embedding=cr["embedding"],
index=idx,
object="embedding",
)
)
if len(remaining_list) == 0:
# LOG SUCCESS
cache_hit = True
end_time = datetime.datetime.now()
(
model,
custom_llm_provider,
dynamic_api_key,
api_base,
) = litellm.get_llm_provider(
model=model,
custom_llm_provider=kwargs.get(
"custom_llm_provider", None
),
api_base=kwargs.get("api_base", None),
api_key=kwargs.get("api_key", None),
)
print_verbose(
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
)
logging_obj.update_environment_variables(
model=model,
user=kwargs.get("user", None),
optional_params={},
litellm_params={
"logger_fn": kwargs.get("logger_fn", None),
"acompletion": True,
"metadata": kwargs.get("metadata", {}),
"model_info": kwargs.get("model_info", {}),
"proxy_server_request": kwargs.get(
"proxy_server_request", None
),
"preset_cache_key": kwargs.get(
"preset_cache_key", None
),
"stream_response": kwargs.get(
"stream_response", {}
),
"api_base": "",
},
input=kwargs.get("messages", ""),
api_key=kwargs.get("api_key", None),
original_response=str(final_embedding_cached_response),
additional_args=None,
stream=kwargs.get("stream", False),
)
asyncio.create_task(
logging_obj.async_success_handler(
final_embedding_cached_response,
start_time,
end_time,
cache_hit,
)
)
threading.Thread(
target=logging_obj.success_handler,
args=(
final_embedding_cached_response,
start_time,
end_time,
cache_hit,
),
).start()
return final_embedding_cached_response
# MODEL CALL
result = await original_function(*args, **kwargs)
end_time = datetime.datetime.now()
@ -1467,51 +1203,14 @@ def client(original_function):
original_response=result, model=model, optional_params=kwargs
)
# [OPTIONAL] ADD TO CACHE
if (
(litellm.cache is not None)
and litellm.cache.supported_call_types is not None
and (
str(original_function.__name__)
in litellm.cache.supported_call_types
)
and (kwargs.get("cache", {}).get("no-store", False) is not True)
):
if (
isinstance(result, litellm.ModelResponse)
or isinstance(result, litellm.EmbeddingResponse)
or isinstance(result, TranscriptionResponse)
or isinstance(result, RerankResponse)
):
if (
isinstance(result, EmbeddingResponse)
and isinstance(kwargs["input"], list)
and litellm.cache is not None
and not isinstance(
litellm.cache.cache, S3Cache
) # s3 doesn't support bulk writing. Exclude.
):
asyncio.create_task(
litellm.cache.async_add_cache_pipeline(
result, *args, **kwargs
)
)
elif isinstance(litellm.cache.cache, S3Cache):
threading.Thread(
target=litellm.cache.add_cache,
args=(result,) + args,
## Add response to cache
await _llm_caching_handler._async_set_cache(
result=result,
original_function=original_function,
kwargs=kwargs,
).start()
else:
asyncio.create_task(
litellm.cache.async_add_cache(
result.json(), *args, **kwargs
)
)
else:
asyncio.create_task(
litellm.cache.async_add_cache(result, *args, **kwargs)
args=args,
)
# LOG SUCCESS - handle streaming success logging in the _next_ object
print_verbose(
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
@ -1528,24 +1227,32 @@ def client(original_function):
# REBUILD EMBEDDING CACHING
if (
isinstance(result, EmbeddingResponse)
and final_embedding_cached_response is not None
and final_embedding_cached_response.data is not None
and _caching_handler_response.final_embedding_cached_response
is not None
and _caching_handler_response.final_embedding_cached_response.data
is not None
):
idx = 0
final_data_list = []
for item in final_embedding_cached_response.data:
for (
item
) in _caching_handler_response.final_embedding_cached_response.data:
if item is None and result.data is not None:
final_data_list.append(result.data[idx])
idx += 1
else:
final_data_list.append(item)
final_embedding_cached_response.data = final_data_list
final_embedding_cached_response._hidden_params["cache_hit"] = True
final_embedding_cached_response._response_ms = (
_caching_handler_response.final_embedding_cached_response.data = (
final_data_list
)
_caching_handler_response.final_embedding_cached_response._hidden_params[
"cache_hit"
] = True
_caching_handler_response.final_embedding_cached_response._response_ms = (
end_time - start_time
).total_seconds() * 1000
return final_embedding_cached_response
return _caching_handler_response.final_embedding_cached_response
return result
except Exception as e:

View file

@ -19,7 +19,7 @@ from litellm._logging import verbose_proxy_logger
from litellm.proxy.utils import PrismaClient, ProxyLogging
verbose_proxy_logger.setLevel(level=logging.DEBUG)
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.router import (
Deployment,
updateDeployment,

View file

@ -28,7 +28,7 @@ import pytest
from openai import APIError
import litellm
from litellm.caching import DualCache, RedisCache
from litellm.caching.caching import DualCache, RedisCache
from litellm.integrations.SlackAlerting.slack_alerting import (
DeploymentMetrics,
SlackAlerting,

View file

@ -13,7 +13,7 @@ sys.path.insert(
) # Adds the parent directory to the system path
import pytest, litellm
from litellm.proxy.auth.auth_checks import get_end_user_object
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import LiteLLM_EndUserTable, LiteLLM_BudgetTable
from litellm.proxy.utils import PrismaClient

View file

@ -21,7 +21,7 @@ import pytest
import litellm
from litellm import Router, mock_completion
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.utils import ProxyLogging

View file

@ -21,7 +21,7 @@ from litellm.proxy.enterprise.enterprise_hooks.banned_keywords import (
from litellm import Router, mock_completion
from litellm.proxy.utils import ProxyLogging, hash_token
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
@pytest.mark.asyncio

View file

@ -27,7 +27,7 @@ import pytest
import litellm
from litellm import Router, mock_completion
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.enterprise.enterprise_hooks.blocked_user_list import (
_ENTERPRISE_BlockedUserList,
@ -56,7 +56,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
from starlette.datastructures import URL
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import (
BlockUsers,
DynamoDBArgs,

View file

@ -21,7 +21,7 @@ import pytest
import litellm
from litellm import aembedding, completion, embedding
from litellm.caching import Cache
from litellm.caching.caching import Cache
from unittest.mock import AsyncMock, patch, MagicMock
import datetime
@ -52,7 +52,7 @@ async def test_dual_cache_async_batch_get_cache():
- hit redis for the other -> expect to return None
- expect result = [in_memory_result, None]
"""
from litellm.caching import DualCache, InMemoryCache, RedisCache
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
in_memory_cache = InMemoryCache()
redis_cache = RedisCache() # get credentials from environment
@ -74,7 +74,7 @@ def test_dual_cache_batch_get_cache():
- hit redis for the other -> expect to return None
- expect result = [in_memory_result, None]
"""
from litellm.caching import DualCache, InMemoryCache, RedisCache
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
in_memory_cache = InMemoryCache()
redis_cache = RedisCache() # get credentials from environment
@ -520,6 +520,7 @@ async def test_embedding_caching_azure_individual_items_reordered():
assert embedding_val_1[0]["id"] == embedding_val_2[0]["id"]
```
"""
litellm.set_verbose = True
litellm.cache = Cache()
common_msg = f"{uuid.uuid4()}"
common_msg_2 = f"hey how's it going {uuid.uuid4()}"
@ -532,9 +533,11 @@ async def test_embedding_caching_azure_individual_items_reordered():
embedding_val_1 = await aembedding(
model="azure/azure-embedding-model", input=embedding_1, caching=True
)
print("embedding val 1", embedding_val_1)
embedding_val_2 = await aembedding(
model="azure/azure-embedding-model", input=embedding_2, caching=True
)
print("embedding val 2", embedding_val_2)
print(f"embedding_val_2._hidden_params: {embedding_val_2._hidden_params}")
assert embedding_val_2._hidden_params["cache_hit"] == True
@ -866,7 +869,7 @@ async def test_redis_cache_cluster_init_unit_test():
from redis.asyncio import RedisCluster as AsyncRedisCluster
from redis.cluster import RedisCluster
from litellm.caching import RedisCache
from litellm.caching.caching import RedisCache
litellm.set_verbose = True
@ -900,7 +903,7 @@ async def test_redis_cache_cluster_init_with_env_vars_unit_test():
from redis.asyncio import RedisCluster as AsyncRedisCluster
from redis.cluster import RedisCluster
from litellm.caching import RedisCache
from litellm.caching.caching import RedisCache
litellm.set_verbose = True
@ -1554,7 +1557,7 @@ def test_custom_redis_cache_params():
def test_get_cache_key():
from litellm.caching import Cache
from litellm.caching.caching import Cache
try:
print("Testing get_cache_key")
@ -1989,7 +1992,7 @@ async def test_cache_default_off_acompletion():
verbose_logger.setLevel(logging.DEBUG)
from litellm.caching import CacheMode
from litellm.caching.caching import CacheMode
random_number = random.randint(
1, 100000
@ -2072,7 +2075,7 @@ async def test_dual_cache_uses_redis():
- Assert that value from redis is used
"""
litellm.set_verbose = True
from litellm.caching import DualCache, RedisCache
from litellm.caching.caching import DualCache, RedisCache
current_usage = uuid.uuid4()
@ -2095,7 +2098,7 @@ async def test_proxy_logging_setup():
"""
Assert always_read_redis is True when used by internal usage cache
"""
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy.utils import ProxyLogging
pl_obj = ProxyLogging(user_api_key_cache=DualCache())
@ -2165,7 +2168,7 @@ async def test_redis_proxy_batch_redis_get_cache():
- make 2nd call -> expect hit
"""
from litellm.caching import Cache, DualCache
from litellm.caching.caching import Cache, DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.hooks.batch_redis_get import _PROXY_BatchRedisRequests

View file

@ -15,7 +15,7 @@ sys.path.insert(
import pytest
import litellm
from litellm import embedding, completion, Router
from litellm.caching import Cache
from litellm.caching.caching import Cache
messages = [{"role": "user", "content": f"who is ishaan {time.time()}"}]

View file

@ -151,7 +151,7 @@ async def test_datadog_log_redis_failures():
Test that poorly configured Redis is logged as Warning on DataDog
"""
try:
from litellm.caching import Cache
from litellm.caching.caching import Cache
from litellm.integrations.datadog.datadog import DataDogLogger
litellm.cache = Cache(

View file

@ -24,7 +24,7 @@ import pytest
from fastapi import Request
import litellm
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable, LiteLLMRoutes
from litellm.proxy.auth.handle_jwt import JWTHandler
from litellm.proxy.management_endpoints.team_endpoints import new_team

View file

@ -89,7 +89,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
from starlette.datastructures import URL
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import (
DynamoDBArgs,
GenerateKeyRequest,
@ -1444,7 +1444,7 @@ def test_call_with_key_over_budget(prisma_client):
# update spend using track_cost callback, make 2nd request, it should fail
from litellm import Choices, Message, ModelResponse, Usage
from litellm.caching import Cache
from litellm.caching.caching import Cache
from litellm.proxy.proxy_server import (
_PROXY_track_cost_callback as track_cost_callback,
)
@ -1564,7 +1564,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
setattr(litellm.proxy.proxy_server, "proxy_batch_write_at", 1)
from litellm import Choices, Message, ModelResponse, Usage
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache()
import time
@ -1685,7 +1685,7 @@ def test_call_with_key_over_model_budget(prisma_client):
# update spend using track_cost callback, make 2nd request, it should fail
from litellm import Choices, Message, ModelResponse, Usage
from litellm.caching import Cache
from litellm.caching.caching import Cache
from litellm.proxy.proxy_server import (
_PROXY_track_cost_callback as track_cost_callback,
)

View file

@ -25,7 +25,7 @@ import pytest
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
from litellm.proxy.proxy_server import embeddings

View file

@ -20,7 +20,7 @@ import pytest
import litellm
from litellm import Router
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
### UNIT TESTS FOR LEAST BUSY LOGGING ###

View file

@ -20,7 +20,7 @@ from litellm.proxy.enterprise.enterprise_hooks.llm_guard import _ENTERPRISE_LLMG
from litellm import Router, mock_completion
from litellm.proxy.utils import ProxyLogging, hash_token
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
### UNIT TESTS FOR LLM GUARD ###

View file

@ -10,7 +10,7 @@
# import asyncio
# from litellm import Router, Timeout
# import time
# from litellm.caching import Cache
# from litellm.caching.caching import Cache
# import litellm
# litellm.cache = Cache(

View file

@ -15,7 +15,7 @@ sys.path.insert(
import pytest
from litellm import Router
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
### UNIT TESTS FOR cost ROUTING ###

View file

@ -22,7 +22,7 @@ import pytest
import litellm
from litellm import Router
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
### UNIT TESTS FOR LATENCY ROUTING ###

View file

@ -19,7 +19,7 @@
# from litellm import Router
# from litellm.proxy.utils import ProxyLogging, hash_token
# from litellm.proxy._types import UserAPIKeyAuth
# from litellm.caching import DualCache, RedisCache
# from litellm.caching.caching import DualCache, RedisCache
# from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
# from datetime import datetime

View file

@ -22,7 +22,7 @@ from litellm.proxy.enterprise.enterprise_hooks.openai_moderation import (
from litellm import Router, mock_completion
from litellm.proxy.utils import ProxyLogging, hash_token
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
### UNIT TESTS FOR OpenAI Moderation ###

View file

@ -23,7 +23,7 @@ import pytest
import litellm
from litellm import Router
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.hooks.parallel_request_limiter import (
_PROXY_MaxParallelRequestsHandler as MaxParallelRequestsHandler,

View file

@ -22,7 +22,7 @@ import pytest
import litellm
from litellm import Router, mock_completion
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
from litellm.proxy.utils import ProxyLogging

View file

@ -67,7 +67,7 @@ async def test_completion_with_caching_bad_call():
litellm.set_verbose = True
try:
from litellm.caching import RedisCache
from litellm.caching.caching import RedisCache
litellm.service_callback = ["prometheus_system"]
sl = ServiceLogging(mock_testing=True)

View file

@ -20,7 +20,7 @@ from litellm.proxy.hooks.prompt_injection_detection import (
from litellm import Router, mock_completion
from litellm.proxy.utils import ProxyLogging
from litellm.proxy._types import UserAPIKeyAuth, LiteLLMPromptInjectionParams
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
@pytest.mark.asyncio

View file

@ -31,7 +31,7 @@ from starlette.datastructures import URL
import litellm
from litellm import Router, mock_completion
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (

View file

@ -745,7 +745,7 @@ async def test_team_update_redis():
"""
Tests if team update, updates the redis cache if set
"""
from litellm.caching import DualCache, RedisCache
from litellm.caching.caching import DualCache, RedisCache
from litellm.proxy._types import LiteLLM_TeamTableCachedObj
from litellm.proxy.auth.auth_checks import _cache_team_object
@ -775,7 +775,7 @@ async def test_get_team_redis(client_no_auth):
"""
Tests if get_team_object gets value from redis cache, if set
"""
from litellm.caching import DualCache, RedisCache
from litellm.caching.caching import DualCache, RedisCache
from litellm.proxy.auth.auth_checks import get_team_object
proxy_logging_obj: ProxyLogging = getattr(

View file

@ -26,7 +26,7 @@ from starlette.datastructures import URL
import litellm
from litellm import Router, mock_completion
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (

View file

@ -3128,7 +3128,7 @@ async def test_azure_astreaming_and_function_calling():
"content": f"What is the weather like in Boston? {uuid.uuid4()}",
}
]
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache(
type="redis",

View file

@ -23,7 +23,7 @@ import pytest
import litellm
from litellm import Router
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.router_strategy.lowest_tpm_rpm_v2 import (
LowestTPMLoggingHandler_v2 as LowestTPMLoggingHandler,
)

View file

@ -27,7 +27,7 @@ import pytest
import litellm
from litellm import Router, mock_completion
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.management_endpoints.internal_user_endpoints import (
new_user,
@ -53,7 +53,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
from starlette.datastructures import URL
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import (
BlockUsers,
DynamoDBArgs,

View file

@ -157,7 +157,7 @@ async def test_transcription_on_router():
@pytest.mark.asyncio()
async def test_transcription_caching():
import litellm
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.set_verbose = True
litellm.cache = Cache()

View file

@ -71,7 +71,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
from starlette.datastructures import URL
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import (
DynamoDBArgs,
GenerateKeyRequest,

View file

@ -78,7 +78,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
from starlette.datastructures import URL
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import *
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())

View file

@ -17,7 +17,7 @@ from litellm.proxy._types import LitellmUserRoles
import os
import jwt
import time
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())

View file

@ -85,7 +85,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
from starlette.datastructures import URL
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import (
DynamoDBArgs,
GenerateKeyRequest,