(refactor) caching use LLMCachingHandler for async_get_cache and set_cache (#6208)

* use folder for caching

* fix importing caching

* fix clickhouse pyright

* fix linting

* fix correctly pass kwargs and args

* fix test case for embedding

* fix linting

* fix embedding caching logic

* fix refactor handle utils.py

* fix test_embedding_caching_azure_individual_items_reordered
This commit is contained in:
Ishaan Jaff 2024-10-14 16:34:01 +05:30 committed by GitHub
parent 20e50d7002
commit 4d1b4beb3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
96 changed files with 690 additions and 489 deletions

View file

@ -19,7 +19,7 @@ from litellm._logging import verbose_proxy_logger
from litellm.proxy.utils import PrismaClient, ProxyLogging
verbose_proxy_logger.setLevel(level=logging.DEBUG)
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.router import (
Deployment,
updateDeployment,

View file

@ -28,7 +28,7 @@ import pytest
from openai import APIError
import litellm
from litellm.caching import DualCache, RedisCache
from litellm.caching.caching import DualCache, RedisCache
from litellm.integrations.SlackAlerting.slack_alerting import (
DeploymentMetrics,
SlackAlerting,

View file

@ -13,7 +13,7 @@ sys.path.insert(
) # Adds the parent directory to the system path
import pytest, litellm
from litellm.proxy.auth.auth_checks import get_end_user_object
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import LiteLLM_EndUserTable, LiteLLM_BudgetTable
from litellm.proxy.utils import PrismaClient

View file

@ -21,7 +21,7 @@ import pytest
import litellm
from litellm import Router, mock_completion
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.utils import ProxyLogging

View file

@ -21,7 +21,7 @@ from litellm.proxy.enterprise.enterprise_hooks.banned_keywords import (
from litellm import Router, mock_completion
from litellm.proxy.utils import ProxyLogging, hash_token
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
@pytest.mark.asyncio

View file

@ -27,7 +27,7 @@ import pytest
import litellm
from litellm import Router, mock_completion
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.enterprise.enterprise_hooks.blocked_user_list import (
_ENTERPRISE_BlockedUserList,
@ -56,7 +56,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
from starlette.datastructures import URL
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import (
BlockUsers,
DynamoDBArgs,

View file

@ -21,7 +21,7 @@ import pytest
import litellm
from litellm import aembedding, completion, embedding
from litellm.caching import Cache
from litellm.caching.caching import Cache
from unittest.mock import AsyncMock, patch, MagicMock
import datetime
@ -52,7 +52,7 @@ async def test_dual_cache_async_batch_get_cache():
- hit redis for the other -> expect to return None
- expect result = [in_memory_result, None]
"""
from litellm.caching import DualCache, InMemoryCache, RedisCache
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
in_memory_cache = InMemoryCache()
redis_cache = RedisCache() # get credentials from environment
@ -74,7 +74,7 @@ def test_dual_cache_batch_get_cache():
- hit redis for the other -> expect to return None
- expect result = [in_memory_result, None]
"""
from litellm.caching import DualCache, InMemoryCache, RedisCache
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
in_memory_cache = InMemoryCache()
redis_cache = RedisCache() # get credentials from environment
@ -520,6 +520,7 @@ async def test_embedding_caching_azure_individual_items_reordered():
assert embedding_val_1[0]["id"] == embedding_val_2[0]["id"]
```
"""
litellm.set_verbose = True
litellm.cache = Cache()
common_msg = f"{uuid.uuid4()}"
common_msg_2 = f"hey how's it going {uuid.uuid4()}"
@ -532,9 +533,11 @@ async def test_embedding_caching_azure_individual_items_reordered():
embedding_val_1 = await aembedding(
model="azure/azure-embedding-model", input=embedding_1, caching=True
)
print("embedding val 1", embedding_val_1)
embedding_val_2 = await aembedding(
model="azure/azure-embedding-model", input=embedding_2, caching=True
)
print("embedding val 2", embedding_val_2)
print(f"embedding_val_2._hidden_params: {embedding_val_2._hidden_params}")
assert embedding_val_2._hidden_params["cache_hit"] == True
@ -866,7 +869,7 @@ async def test_redis_cache_cluster_init_unit_test():
from redis.asyncio import RedisCluster as AsyncRedisCluster
from redis.cluster import RedisCluster
from litellm.caching import RedisCache
from litellm.caching.caching import RedisCache
litellm.set_verbose = True
@ -900,7 +903,7 @@ async def test_redis_cache_cluster_init_with_env_vars_unit_test():
from redis.asyncio import RedisCluster as AsyncRedisCluster
from redis.cluster import RedisCluster
from litellm.caching import RedisCache
from litellm.caching.caching import RedisCache
litellm.set_verbose = True
@ -1554,7 +1557,7 @@ def test_custom_redis_cache_params():
def test_get_cache_key():
from litellm.caching import Cache
from litellm.caching.caching import Cache
try:
print("Testing get_cache_key")
@ -1989,7 +1992,7 @@ async def test_cache_default_off_acompletion():
verbose_logger.setLevel(logging.DEBUG)
from litellm.caching import CacheMode
from litellm.caching.caching import CacheMode
random_number = random.randint(
1, 100000
@ -2072,7 +2075,7 @@ async def test_dual_cache_uses_redis():
- Assert that value from redis is used
"""
litellm.set_verbose = True
from litellm.caching import DualCache, RedisCache
from litellm.caching.caching import DualCache, RedisCache
current_usage = uuid.uuid4()
@ -2095,7 +2098,7 @@ async def test_proxy_logging_setup():
"""
Assert always_read_redis is True when used by internal usage cache
"""
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy.utils import ProxyLogging
pl_obj = ProxyLogging(user_api_key_cache=DualCache())
@ -2165,7 +2168,7 @@ async def test_redis_proxy_batch_redis_get_cache():
- make 2nd call -> expect hit
"""
from litellm.caching import Cache, DualCache
from litellm.caching.caching import Cache, DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.hooks.batch_redis_get import _PROXY_BatchRedisRequests

View file

@ -15,7 +15,7 @@ sys.path.insert(
import pytest
import litellm
from litellm import embedding, completion, Router
from litellm.caching import Cache
from litellm.caching.caching import Cache
messages = [{"role": "user", "content": f"who is ishaan {time.time()}"}]

View file

@ -151,7 +151,7 @@ async def test_datadog_log_redis_failures():
Test that poorly configured Redis is logged as Warning on DataDog
"""
try:
from litellm.caching import Cache
from litellm.caching.caching import Cache
from litellm.integrations.datadog.datadog import DataDogLogger
litellm.cache = Cache(

View file

@ -24,7 +24,7 @@ import pytest
from fastapi import Request
import litellm
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable, LiteLLMRoutes
from litellm.proxy.auth.handle_jwt import JWTHandler
from litellm.proxy.management_endpoints.team_endpoints import new_team

View file

@ -89,7 +89,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
from starlette.datastructures import URL
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import (
DynamoDBArgs,
GenerateKeyRequest,
@ -1444,7 +1444,7 @@ def test_call_with_key_over_budget(prisma_client):
# update spend using track_cost callback, make 2nd request, it should fail
from litellm import Choices, Message, ModelResponse, Usage
from litellm.caching import Cache
from litellm.caching.caching import Cache
from litellm.proxy.proxy_server import (
_PROXY_track_cost_callback as track_cost_callback,
)
@ -1564,7 +1564,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
setattr(litellm.proxy.proxy_server, "proxy_batch_write_at", 1)
from litellm import Choices, Message, ModelResponse, Usage
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache()
import time
@ -1685,7 +1685,7 @@ def test_call_with_key_over_model_budget(prisma_client):
# update spend using track_cost callback, make 2nd request, it should fail
from litellm import Choices, Message, ModelResponse, Usage
from litellm.caching import Cache
from litellm.caching.caching import Cache
from litellm.proxy.proxy_server import (
_PROXY_track_cost_callback as track_cost_callback,
)

View file

@ -25,7 +25,7 @@ import pytest
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
from litellm.proxy.proxy_server import embeddings

View file

@ -20,7 +20,7 @@ import pytest
import litellm
from litellm import Router
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
### UNIT TESTS FOR LEAST BUSY LOGGING ###

View file

@ -20,7 +20,7 @@ from litellm.proxy.enterprise.enterprise_hooks.llm_guard import _ENTERPRISE_LLMG
from litellm import Router, mock_completion
from litellm.proxy.utils import ProxyLogging, hash_token
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
### UNIT TESTS FOR LLM GUARD ###

View file

@ -10,7 +10,7 @@
# import asyncio
# from litellm import Router, Timeout
# import time
# from litellm.caching import Cache
# from litellm.caching.caching import Cache
# import litellm
# litellm.cache = Cache(

View file

@ -15,7 +15,7 @@ sys.path.insert(
import pytest
from litellm import Router
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
### UNIT TESTS FOR cost ROUTING ###

View file

@ -22,7 +22,7 @@ import pytest
import litellm
from litellm import Router
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
### UNIT TESTS FOR LATENCY ROUTING ###

View file

@ -19,7 +19,7 @@
# from litellm import Router
# from litellm.proxy.utils import ProxyLogging, hash_token
# from litellm.proxy._types import UserAPIKeyAuth
# from litellm.caching import DualCache, RedisCache
# from litellm.caching.caching import DualCache, RedisCache
# from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
# from datetime import datetime

View file

@ -22,7 +22,7 @@ from litellm.proxy.enterprise.enterprise_hooks.openai_moderation import (
from litellm import Router, mock_completion
from litellm.proxy.utils import ProxyLogging, hash_token
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
### UNIT TESTS FOR OpenAI Moderation ###

View file

@ -23,7 +23,7 @@ import pytest
import litellm
from litellm import Router
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.hooks.parallel_request_limiter import (
_PROXY_MaxParallelRequestsHandler as MaxParallelRequestsHandler,

View file

@ -22,7 +22,7 @@ import pytest
import litellm
from litellm import Router, mock_completion
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
from litellm.proxy.utils import ProxyLogging

View file

@ -67,7 +67,7 @@ async def test_completion_with_caching_bad_call():
litellm.set_verbose = True
try:
from litellm.caching import RedisCache
from litellm.caching.caching import RedisCache
litellm.service_callback = ["prometheus_system"]
sl = ServiceLogging(mock_testing=True)

View file

@ -20,7 +20,7 @@ from litellm.proxy.hooks.prompt_injection_detection import (
from litellm import Router, mock_completion
from litellm.proxy.utils import ProxyLogging
from litellm.proxy._types import UserAPIKeyAuth, LiteLLMPromptInjectionParams
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
@pytest.mark.asyncio

View file

@ -31,7 +31,7 @@ from starlette.datastructures import URL
import litellm
from litellm import Router, mock_completion
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (

View file

@ -745,7 +745,7 @@ async def test_team_update_redis():
"""
Tests if team update, updates the redis cache if set
"""
from litellm.caching import DualCache, RedisCache
from litellm.caching.caching import DualCache, RedisCache
from litellm.proxy._types import LiteLLM_TeamTableCachedObj
from litellm.proxy.auth.auth_checks import _cache_team_object
@ -775,7 +775,7 @@ async def test_get_team_redis(client_no_auth):
"""
Tests if get_team_object gets value from redis cache, if set
"""
from litellm.caching import DualCache, RedisCache
from litellm.caching.caching import DualCache, RedisCache
from litellm.proxy.auth.auth_checks import get_team_object
proxy_logging_obj: ProxyLogging = getattr(

View file

@ -26,7 +26,7 @@ from starlette.datastructures import URL
import litellm
from litellm import Router, mock_completion
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (

View file

@ -3128,7 +3128,7 @@ async def test_azure_astreaming_and_function_calling():
"content": f"What is the weather like in Boston? {uuid.uuid4()}",
}
]
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.cache = Cache(
type="redis",

View file

@ -23,7 +23,7 @@ import pytest
import litellm
from litellm import Router
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.router_strategy.lowest_tpm_rpm_v2 import (
LowestTPMLoggingHandler_v2 as LowestTPMLoggingHandler,
)

View file

@ -27,7 +27,7 @@ import pytest
import litellm
from litellm import Router, mock_completion
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.management_endpoints.internal_user_endpoints import (
new_user,
@ -53,7 +53,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
from starlette.datastructures import URL
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import (
BlockUsers,
DynamoDBArgs,

View file

@ -157,7 +157,7 @@ async def test_transcription_on_router():
@pytest.mark.asyncio()
async def test_transcription_caching():
import litellm
from litellm.caching import Cache
from litellm.caching.caching import Cache
litellm.set_verbose = True
litellm.cache = Cache()

View file

@ -71,7 +71,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
from starlette.datastructures import URL
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import (
DynamoDBArgs,
GenerateKeyRequest,

View file

@ -78,7 +78,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
from starlette.datastructures import URL
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import *
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())

View file

@ -17,7 +17,7 @@ from litellm.proxy._types import LitellmUserRoles
import os
import jwt
import time
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())

View file

@ -85,7 +85,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
from starlette.datastructures import URL
from litellm.caching import DualCache
from litellm.caching.caching import DualCache
from litellm.proxy._types import (
DynamoDBArgs,
GenerateKeyRequest,