(refactor) caching use LLMCachingHandler for async_get_cache and set_cache (#6208)

* use folder for caching

* fix importing caching

* fix clickhouse pyright

* fix linting

* fix correctly pass kwargs and args

* fix test case for embedding

* fix linting

* fix embedding caching logic

* fix refactor handle utils.py

* fix test_embedding_caching_azure_individual_items_reordered
This commit is contained in:
Ishaan Jaff 2024-10-14 16:34:01 +05:30 committed by GitHub
parent 20e50d7002
commit 4d1b4beb3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
96 changed files with 690 additions and 489 deletions

View file

@ -89,7 +89,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
from starlette.datastructures import URL
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import (
DynamoDBArgs,
GenerateKeyRequest,
@ -1444,7 +1444,7 @@ def test_call_with_key_over_budget(prisma_client):
# update spend using track_cost callback, make 2nd request, it should fail
from litellm import Choices, Message, ModelResponse, Usage
from litellm.caching import Cache
from litellm.caching.caching import Cache
from litellm.proxy.proxy_server import (
_PROXY_track_cost_callback as track_cost_callback,
)
@ -1564,7 +1564,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
setattr(litellm.proxy.proxy_server, "proxy_batch_write_at", 1)
from litellm import Choices, Message, ModelResponse, Usage
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache()
import time
@ -1685,7 +1685,7 @@ def test_call_with_key_over_model_budget(prisma_client):
# update spend using track_cost callback, make 2nd request, it should fail
from litellm import Choices, Message, ModelResponse, Usage
from litellm.caching import Cache
from litellm.caching.caching import Cache
from litellm.proxy.proxy_server import (
_PROXY_track_cost_callback as track_cost_callback,
)