(refactor) caching use LLMCachingHandler for async_get_cache and set_cache (#6208)

* use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * fix test_embedding_caching_azure_individual_items_reordered
2024-10-14 16:34:01 +05:30 · 2024-10-14 16:34:01 +05:30 · 4d1b4beb3d
commit 4d1b4beb3d
parent 20e50d7002
96 changed files with 690 additions and 489 deletions
--- a/tests/local_testing/test_add_update_models.py
+++ b/tests/local_testing/test_add_update_models.py
@ -19,7 +19,7 @@ from litellm._logging import verbose_proxy_logger
 from litellm.proxy.utils import PrismaClient, ProxyLogging

 verbose_proxy_logger.setLevel(level=logging.DEBUG)
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.router import (
    Deployment,
    updateDeployment,
--- a/tests/local_testing/test_alerting.py
+++ b/tests/local_testing/test_alerting.py
@ -28,7 +28,7 @@ import pytest
 from openai import APIError

 import litellm
-from litellm.caching import DualCache, RedisCache
+from litellm.caching.caching import DualCache, RedisCache
 from litellm.integrations.SlackAlerting.slack_alerting import (
    DeploymentMetrics,
    SlackAlerting,
--- a/tests/local_testing/test_auth_checks.py
+++ b/tests/local_testing/test_auth_checks.py
@ -13,7 +13,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import pytest, litellm
 from litellm.proxy.auth.auth_checks import get_end_user_object
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.proxy._types import LiteLLM_EndUserTable, LiteLLM_BudgetTable
 from litellm.proxy.utils import PrismaClient

--- a/tests/local_testing/test_azure_content_safety.py
+++ b/tests/local_testing/test_azure_content_safety.py
@ -21,7 +21,7 @@ import pytest

 import litellm
 from litellm import Router, mock_completion
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.utils import ProxyLogging

--- a/tests/local_testing/test_banned_keyword_list.py
+++ b/tests/local_testing/test_banned_keyword_list.py
@ -21,7 +21,7 @@ from litellm.proxy.enterprise.enterprise_hooks.banned_keywords import (
 from litellm import Router, mock_completion
 from litellm.proxy.utils import ProxyLogging, hash_token
 from litellm.proxy._types import UserAPIKeyAuth
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache


@pytest.mark.asyncio
--- a/tests/local_testing/test_blocked_user_list.py
+++ b/tests/local_testing/test_blocked_user_list.py
@ -27,7 +27,7 @@ import pytest
 import litellm
 from litellm import Router, mock_completion
 from litellm._logging import verbose_proxy_logger
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.enterprise.enterprise_hooks.blocked_user_list import (
    _ENTERPRISE_BlockedUserList,
@ -56,7 +56,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)

 from starlette.datastructures import URL

-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.proxy._types import (
    BlockUsers,
    DynamoDBArgs,
--- a/tests/local_testing/test_caching.py
+++ b/tests/local_testing/test_caching.py
@ -21,7 +21,7 @@ import pytest

 import litellm
 from litellm import aembedding, completion, embedding
-from litellm.caching import Cache
+from litellm.caching.caching import Cache

 from unittest.mock import AsyncMock, patch, MagicMock
 import datetime
@ -52,7 +52,7 @@ async def test_dual_cache_async_batch_get_cache():
    - hit redis for the other -> expect to return None
    - expect result = [in_memory_result, None]
    """
-    from litellm.caching import DualCache, InMemoryCache, RedisCache
+    from litellm.caching.caching import DualCache, InMemoryCache, RedisCache

    in_memory_cache = InMemoryCache()
    redis_cache = RedisCache()  # get credentials from environment
@ -74,7 +74,7 @@ def test_dual_cache_batch_get_cache():
    - hit redis for the other -> expect to return None
    - expect result = [in_memory_result, None]
    """
-    from litellm.caching import DualCache, InMemoryCache, RedisCache
+    from litellm.caching.caching import DualCache, InMemoryCache, RedisCache

    in_memory_cache = InMemoryCache()
    redis_cache = RedisCache()  # get credentials from environment
@ -520,6 +520,7 @@ async def test_embedding_caching_azure_individual_items_reordered():
    assert embedding_val_1[0]["id"] == embedding_val_2[0]["id"]
    ```
    """
+    litellm.set_verbose = True
    litellm.cache = Cache()
    common_msg = f"{uuid.uuid4()}"
    common_msg_2 = f"hey how's it going {uuid.uuid4()}"
@ -532,9 +533,11 @@ async def test_embedding_caching_azure_individual_items_reordered():
    embedding_val_1 = await aembedding(
        model="azure/azure-embedding-model", input=embedding_1, caching=True
    )
+    print("embedding val 1", embedding_val_1)
    embedding_val_2 = await aembedding(
        model="azure/azure-embedding-model", input=embedding_2, caching=True
    )
+    print("embedding val 2", embedding_val_2)
    print(f"embedding_val_2._hidden_params: {embedding_val_2._hidden_params}")
    assert embedding_val_2._hidden_params["cache_hit"] == True

@ -866,7 +869,7 @@ async def test_redis_cache_cluster_init_unit_test():
        from redis.asyncio import RedisCluster as AsyncRedisCluster
        from redis.cluster import RedisCluster

-        from litellm.caching import RedisCache
+        from litellm.caching.caching import RedisCache

        litellm.set_verbose = True

@ -900,7 +903,7 @@ async def test_redis_cache_cluster_init_with_env_vars_unit_test():
        from redis.asyncio import RedisCluster as AsyncRedisCluster
        from redis.cluster import RedisCluster

-        from litellm.caching import RedisCache
+        from litellm.caching.caching import RedisCache

        litellm.set_verbose = True

@ -1554,7 +1557,7 @@ def test_custom_redis_cache_params():


 def test_get_cache_key():
-    from litellm.caching import Cache
+    from litellm.caching.caching import Cache

    try:
        print("Testing get_cache_key")
@ -1989,7 +1992,7 @@ async def test_cache_default_off_acompletion():

    verbose_logger.setLevel(logging.DEBUG)

-    from litellm.caching import CacheMode
+    from litellm.caching.caching import CacheMode

    random_number = random.randint(
        1, 100000
@ -2072,7 +2075,7 @@ async def test_dual_cache_uses_redis():
    - Assert that value from redis is used
    """
    litellm.set_verbose = True
-    from litellm.caching import DualCache, RedisCache
+    from litellm.caching.caching import DualCache, RedisCache

    current_usage = uuid.uuid4()

@ -2095,7 +2098,7 @@ async def test_proxy_logging_setup():
    """
    Assert always_read_redis is True when used by internal usage cache
    """
-    from litellm.caching import DualCache
+    from litellm.caching.caching import DualCache
    from litellm.proxy.utils import ProxyLogging

    pl_obj = ProxyLogging(user_api_key_cache=DualCache())
@ -2165,7 +2168,7 @@ async def test_redis_proxy_batch_redis_get_cache():
    - make 2nd call -> expect hit
    """

-    from litellm.caching import Cache, DualCache
+    from litellm.caching.caching import Cache, DualCache
    from litellm.proxy._types import UserAPIKeyAuth
    from litellm.proxy.hooks.batch_redis_get import _PROXY_BatchRedisRequests

--- a/tests/local_testing/test_caching_ssl.py
+++ b/tests/local_testing/test_caching_ssl.py
@ -15,7 +15,7 @@ sys.path.insert(
 import pytest
 import litellm
 from litellm import embedding, completion, Router
-from litellm.caching import Cache
+from litellm.caching.caching import Cache

 messages = [{"role": "user", "content": f"who is ishaan {time.time()}"}]

--- a/tests/local_testing/test_datadog.py
+++ b/tests/local_testing/test_datadog.py
@ -151,7 +151,7 @@ async def test_datadog_log_redis_failures():
    Test that poorly configured Redis is logged as Warning on DataDog
    """
    try:
-        from litellm.caching import Cache
+        from litellm.caching.caching import Cache
        from litellm.integrations.datadog.datadog import DataDogLogger

        litellm.cache = Cache(
--- a/tests/local_testing/test_jwt.py
+++ b/tests/local_testing/test_jwt.py
@ -24,7 +24,7 @@ import pytest
 from fastapi import Request

 import litellm
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable, LiteLLMRoutes
 from litellm.proxy.auth.handle_jwt import JWTHandler
 from litellm.proxy.management_endpoints.team_endpoints import new_team
--- a/tests/local_testing/test_key_generate_prisma.py
+++ b/tests/local_testing/test_key_generate_prisma.py
@ -89,7 +89,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)

 from starlette.datastructures import URL

-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.proxy._types import (
    DynamoDBArgs,
    GenerateKeyRequest,
@ -1444,7 +1444,7 @@ def test_call_with_key_over_budget(prisma_client):

            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm import Choices, Message, ModelResponse, Usage
-            from litellm.caching import Cache
+            from litellm.caching.caching import Cache
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
@ -1564,7 +1564,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
            setattr(litellm.proxy.proxy_server, "proxy_batch_write_at", 1)

            from litellm import Choices, Message, ModelResponse, Usage
-            from litellm.caching import Cache
+            from litellm.caching.caching import Cache

            litellm.cache = Cache()
            import time
@ -1685,7 +1685,7 @@ def test_call_with_key_over_model_budget(prisma_client):

            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm import Choices, Message, ModelResponse, Usage
-            from litellm.caching import Cache
+            from litellm.caching.caching import Cache
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
--- a/tests/local_testing/test_lakera_ai_prompt_injection.py
+++ b/tests/local_testing/test_lakera_ai_prompt_injection.py
@ -25,7 +25,7 @@ import pytest

 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
 from litellm.proxy.proxy_server import embeddings
--- a/tests/local_testing/test_least_busy_routing.py
+++ b/tests/local_testing/test_least_busy_routing.py
@ -20,7 +20,7 @@ import pytest

 import litellm
 from litellm import Router
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.router_strategy.least_busy import LeastBusyLoggingHandler

 ### UNIT TESTS FOR LEAST BUSY LOGGING ###
--- a/tests/local_testing/test_llm_guard.py
+++ b/tests/local_testing/test_llm_guard.py
@ -20,7 +20,7 @@ from litellm.proxy.enterprise.enterprise_hooks.llm_guard import _ENTERPRISE_LLMG
 from litellm import Router, mock_completion
 from litellm.proxy.utils import ProxyLogging, hash_token
 from litellm.proxy._types import UserAPIKeyAuth
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache

 ### UNIT TESTS FOR LLM GUARD ###

--- a/tests/local_testing/test_load_test_router_s3.py
+++ b/tests/local_testing/test_load_test_router_s3.py
@ -10,7 +10,7 @@
 # import asyncio
 # from litellm import Router, Timeout
 # import time
-# from litellm.caching import Cache
+# from litellm.caching.caching import Cache
 # import litellm

 # litellm.cache = Cache(
--- a/tests/local_testing/test_lowest_cost_routing.py
+++ b/tests/local_testing/test_lowest_cost_routing.py
@ -15,7 +15,7 @@ sys.path.insert(
 import pytest
 from litellm import Router
 from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache

 ### UNIT TESTS FOR cost ROUTING ###

--- a/tests/local_testing/test_lowest_latency_routing.py
+++ b/tests/local_testing/test_lowest_latency_routing.py
@ -22,7 +22,7 @@ import pytest

 import litellm
 from litellm import Router
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler

 ### UNIT TESTS FOR LATENCY ROUTING ###
--- a/tests/local_testing/test_max_tpm_rpm_limiter.py
+++ b/tests/local_testing/test_max_tpm_rpm_limiter.py
@ -19,7 +19,7 @@
 # from litellm import Router
 # from litellm.proxy.utils import ProxyLogging, hash_token
 # from litellm.proxy._types import UserAPIKeyAuth
-# from litellm.caching import DualCache, RedisCache
+# from litellm.caching.caching import DualCache, RedisCache
 # from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
 # from datetime import datetime

--- a/tests/local_testing/test_openai_moderations_hook.py
+++ b/tests/local_testing/test_openai_moderations_hook.py
@ -22,7 +22,7 @@ from litellm.proxy.enterprise.enterprise_hooks.openai_moderation import (
 from litellm import Router, mock_completion
 from litellm.proxy.utils import ProxyLogging, hash_token
 from litellm.proxy._types import UserAPIKeyAuth
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache

 ### UNIT TESTS FOR OpenAI Moderation ###

--- a/tests/local_testing/test_parallel_request_limiter.py
+++ b/tests/local_testing/test_parallel_request_limiter.py
@ -23,7 +23,7 @@ import pytest

 import litellm
 from litellm import Router
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.hooks.parallel_request_limiter import (
    _PROXY_MaxParallelRequestsHandler as MaxParallelRequestsHandler,
--- a/tests/local_testing/test_presidio_masking.py
+++ b/tests/local_testing/test_presidio_masking.py
@ -22,7 +22,7 @@ import pytest

 import litellm
 from litellm import Router, mock_completion
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
 from litellm.proxy.utils import ProxyLogging
--- a/tests/local_testing/test_prometheus_service.py
+++ b/tests/local_testing/test_prometheus_service.py
@ -67,7 +67,7 @@ async def test_completion_with_caching_bad_call():
    litellm.set_verbose = True

    try:
-        from litellm.caching import RedisCache
+        from litellm.caching.caching import RedisCache

        litellm.service_callback = ["prometheus_system"]
        sl = ServiceLogging(mock_testing=True)
--- a/tests/local_testing/test_prompt_injection_detection.py
+++ b/tests/local_testing/test_prompt_injection_detection.py
@ -20,7 +20,7 @@ from litellm.proxy.hooks.prompt_injection_detection import (
 from litellm import Router, mock_completion
 from litellm.proxy.utils import ProxyLogging
 from litellm.proxy._types import UserAPIKeyAuth, LiteLLMPromptInjectionParams
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache


@pytest.mark.asyncio
--- a/tests/local_testing/test_proxy_reject_logging.py
+++ b/tests/local_testing/test_proxy_reject_logging.py
@ -31,7 +31,7 @@ from starlette.datastructures import URL

 import litellm
 from litellm import Router, mock_completion
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
--- a/tests/local_testing/test_proxy_server.py
+++ b/tests/local_testing/test_proxy_server.py
@ -745,7 +745,7 @@ async def test_team_update_redis():
    """
    Tests if team update, updates the redis cache if set
    """
-    from litellm.caching import DualCache, RedisCache
+    from litellm.caching.caching import DualCache, RedisCache
    from litellm.proxy._types import LiteLLM_TeamTableCachedObj
    from litellm.proxy.auth.auth_checks import _cache_team_object

@ -775,7 +775,7 @@ async def test_get_team_redis(client_no_auth):
    """
    Tests if get_team_object gets value from redis cache, if set
    """
-    from litellm.caching import DualCache, RedisCache
+    from litellm.caching.caching import DualCache, RedisCache
    from litellm.proxy.auth.auth_checks import get_team_object

    proxy_logging_obj: ProxyLogging = getattr(
--- a/tests/local_testing/test_secret_detect_hook.py
+++ b/tests/local_testing/test_secret_detect_hook.py
@ -26,7 +26,7 @@ from starlette.datastructures import URL

 import litellm
 from litellm import Router, mock_completion
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
--- a/tests/local_testing/test_streaming.py
+++ b/tests/local_testing/test_streaming.py
@ -3128,7 +3128,7 @@ async def test_azure_astreaming_and_function_calling():
            "content": f"What is the weather like in Boston? {uuid.uuid4()}",
        }
    ]
-    from litellm.caching import Cache
+    from litellm.caching.caching import Cache

    litellm.cache = Cache(
        type="redis",
--- a/tests/local_testing/test_tpm_rpm_routing_v2.py
+++ b/tests/local_testing/test_tpm_rpm_routing_v2.py
@ -23,7 +23,7 @@ import pytest

 import litellm
 from litellm import Router
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.router_strategy.lowest_tpm_rpm_v2 import (
    LowestTPMLoggingHandler_v2 as LowestTPMLoggingHandler,
 )
--- a/tests/local_testing/test_update_spend.py
+++ b/tests/local_testing/test_update_spend.py
@ -27,7 +27,7 @@ import pytest
 import litellm
 from litellm import Router, mock_completion
 from litellm._logging import verbose_proxy_logger
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.management_endpoints.internal_user_endpoints import (
    new_user,
@ -53,7 +53,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)

 from starlette.datastructures import URL

-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.proxy._types import (
    BlockUsers,
    DynamoDBArgs,
--- a/tests/local_testing/test_whisper.py
+++ b/tests/local_testing/test_whisper.py
@ -157,7 +157,7 @@ async def test_transcription_on_router():
@pytest.mark.asyncio()
 async def test_transcription_caching():
    import litellm
-    from litellm.caching import Cache
+    from litellm.caching.caching import Cache

    litellm.set_verbose = True
    litellm.cache = Cache()
--- a/tests/proxy_admin_ui_tests/test_key_management.py
+++ b/tests/proxy_admin_ui_tests/test_key_management.py
@ -71,7 +71,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)

 from starlette.datastructures import URL

-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.proxy._types import (
    DynamoDBArgs,
    GenerateKeyRequest,
--- a/tests/proxy_admin_ui_tests/test_role_based_access.py
+++ b/tests/proxy_admin_ui_tests/test_role_based_access.py
@ -78,7 +78,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)

 from starlette.datastructures import URL

-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.proxy._types import *

 proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
--- a/tests/proxy_admin_ui_tests/test_sso_sign_in.py
+++ b/tests/proxy_admin_ui_tests/test_sso_sign_in.py
@ -17,7 +17,7 @@ from litellm.proxy._types import LitellmUserRoles
 import os
 import jwt
 import time
-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache

 proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())

--- a/tests/proxy_admin_ui_tests/test_usage_endpoints.py
+++ b/tests/proxy_admin_ui_tests/test_usage_endpoints.py
@ -85,7 +85,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)

 from starlette.datastructures import URL

-from litellm.caching import DualCache
+from litellm.caching.caching import DualCache
 from litellm.proxy._types import (
    DynamoDBArgs,
    GenerateKeyRequest,