forked from phoenix/litellm-mirror
(refactor) caching use LLMCachingHandler for async_get_cache and set_cache (#6208)
* use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * fix test_embedding_caching_azure_individual_items_reordered
This commit is contained in:
parent
20e50d7002
commit
4d1b4beb3d
96 changed files with 690 additions and 489 deletions
|
@ -19,7 +19,7 @@ from litellm._logging import verbose_proxy_logger
|
|||
from litellm.proxy.utils import PrismaClient, ProxyLogging
|
||||
|
||||
verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.router import (
|
||||
Deployment,
|
||||
updateDeployment,
|
||||
|
|
|
@ -28,7 +28,7 @@ import pytest
|
|||
from openai import APIError
|
||||
|
||||
import litellm
|
||||
from litellm.caching import DualCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, RedisCache
|
||||
from litellm.integrations.SlackAlerting.slack_alerting import (
|
||||
DeploymentMetrics,
|
||||
SlackAlerting,
|
||||
|
|
|
@ -13,7 +13,7 @@ sys.path.insert(
|
|||
) # Adds the parent directory to the system path
|
||||
import pytest, litellm
|
||||
from litellm.proxy.auth.auth_checks import get_end_user_object
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import LiteLLM_EndUserTable, LiteLLM_BudgetTable
|
||||
from litellm.proxy.utils import PrismaClient
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm import Router, mock_completion
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ from litellm.proxy.enterprise.enterprise_hooks.banned_keywords import (
|
|||
from litellm import Router, mock_completion
|
||||
from litellm.proxy.utils import ProxyLogging, hash_token
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
|
@ -27,7 +27,7 @@ import pytest
|
|||
import litellm
|
||||
from litellm import Router, mock_completion
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.enterprise.enterprise_hooks.blocked_user_list import (
|
||||
_ENTERPRISE_BlockedUserList,
|
||||
|
@ -56,7 +56,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
|||
|
||||
from starlette.datastructures import URL
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import (
|
||||
BlockUsers,
|
||||
DynamoDBArgs,
|
||||
|
|
|
@ -21,7 +21,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm import aembedding, completion, embedding
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
from unittest.mock import AsyncMock, patch, MagicMock
|
||||
import datetime
|
||||
|
@ -52,7 +52,7 @@ async def test_dual_cache_async_batch_get_cache():
|
|||
- hit redis for the other -> expect to return None
|
||||
- expect result = [in_memory_result, None]
|
||||
"""
|
||||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
|
||||
|
||||
in_memory_cache = InMemoryCache()
|
||||
redis_cache = RedisCache() # get credentials from environment
|
||||
|
@ -74,7 +74,7 @@ def test_dual_cache_batch_get_cache():
|
|||
- hit redis for the other -> expect to return None
|
||||
- expect result = [in_memory_result, None]
|
||||
"""
|
||||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
|
||||
|
||||
in_memory_cache = InMemoryCache()
|
||||
redis_cache = RedisCache() # get credentials from environment
|
||||
|
@ -520,6 +520,7 @@ async def test_embedding_caching_azure_individual_items_reordered():
|
|||
assert embedding_val_1[0]["id"] == embedding_val_2[0]["id"]
|
||||
```
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
litellm.cache = Cache()
|
||||
common_msg = f"{uuid.uuid4()}"
|
||||
common_msg_2 = f"hey how's it going {uuid.uuid4()}"
|
||||
|
@ -532,9 +533,11 @@ async def test_embedding_caching_azure_individual_items_reordered():
|
|||
embedding_val_1 = await aembedding(
|
||||
model="azure/azure-embedding-model", input=embedding_1, caching=True
|
||||
)
|
||||
print("embedding val 1", embedding_val_1)
|
||||
embedding_val_2 = await aembedding(
|
||||
model="azure/azure-embedding-model", input=embedding_2, caching=True
|
||||
)
|
||||
print("embedding val 2", embedding_val_2)
|
||||
print(f"embedding_val_2._hidden_params: {embedding_val_2._hidden_params}")
|
||||
assert embedding_val_2._hidden_params["cache_hit"] == True
|
||||
|
||||
|
@ -866,7 +869,7 @@ async def test_redis_cache_cluster_init_unit_test():
|
|||
from redis.asyncio import RedisCluster as AsyncRedisCluster
|
||||
from redis.cluster import RedisCluster
|
||||
|
||||
from litellm.caching import RedisCache
|
||||
from litellm.caching.caching import RedisCache
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
|
@ -900,7 +903,7 @@ async def test_redis_cache_cluster_init_with_env_vars_unit_test():
|
|||
from redis.asyncio import RedisCluster as AsyncRedisCluster
|
||||
from redis.cluster import RedisCluster
|
||||
|
||||
from litellm.caching import RedisCache
|
||||
from litellm.caching.caching import RedisCache
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
|
@ -1554,7 +1557,7 @@ def test_custom_redis_cache_params():
|
|||
|
||||
|
||||
def test_get_cache_key():
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
try:
|
||||
print("Testing get_cache_key")
|
||||
|
@ -1989,7 +1992,7 @@ async def test_cache_default_off_acompletion():
|
|||
|
||||
verbose_logger.setLevel(logging.DEBUG)
|
||||
|
||||
from litellm.caching import CacheMode
|
||||
from litellm.caching.caching import CacheMode
|
||||
|
||||
random_number = random.randint(
|
||||
1, 100000
|
||||
|
@ -2072,7 +2075,7 @@ async def test_dual_cache_uses_redis():
|
|||
- Assert that value from redis is used
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
from litellm.caching import DualCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, RedisCache
|
||||
|
||||
current_usage = uuid.uuid4()
|
||||
|
||||
|
@ -2095,7 +2098,7 @@ async def test_proxy_logging_setup():
|
|||
"""
|
||||
Assert always_read_redis is True when used by internal usage cache
|
||||
"""
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
|
||||
pl_obj = ProxyLogging(user_api_key_cache=DualCache())
|
||||
|
@ -2165,7 +2168,7 @@ async def test_redis_proxy_batch_redis_get_cache():
|
|||
- make 2nd call -> expect hit
|
||||
"""
|
||||
|
||||
from litellm.caching import Cache, DualCache
|
||||
from litellm.caching.caching import Cache, DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.hooks.batch_redis_get import _PROXY_BatchRedisRequests
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ sys.path.insert(
|
|||
import pytest
|
||||
import litellm
|
||||
from litellm import embedding, completion, Router
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
messages = [{"role": "user", "content": f"who is ishaan {time.time()}"}]
|
||||
|
||||
|
|
|
@ -151,7 +151,7 @@ async def test_datadog_log_redis_failures():
|
|||
Test that poorly configured Redis is logged as Warning on DataDog
|
||||
"""
|
||||
try:
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
from litellm.integrations.datadog.datadog import DataDogLogger
|
||||
|
||||
litellm.cache = Cache(
|
||||
|
|
|
@ -24,7 +24,7 @@ import pytest
|
|||
from fastapi import Request
|
||||
|
||||
import litellm
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable, LiteLLMRoutes
|
||||
from litellm.proxy.auth.handle_jwt import JWTHandler
|
||||
from litellm.proxy.management_endpoints.team_endpoints import new_team
|
||||
|
|
|
@ -89,7 +89,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
|||
|
||||
from starlette.datastructures import URL
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import (
|
||||
DynamoDBArgs,
|
||||
GenerateKeyRequest,
|
||||
|
@ -1444,7 +1444,7 @@ def test_call_with_key_over_budget(prisma_client):
|
|||
|
||||
# update spend using track_cost callback, make 2nd request, it should fail
|
||||
from litellm import Choices, Message, ModelResponse, Usage
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
from litellm.proxy.proxy_server import (
|
||||
_PROXY_track_cost_callback as track_cost_callback,
|
||||
)
|
||||
|
@ -1564,7 +1564,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
|
|||
setattr(litellm.proxy.proxy_server, "proxy_batch_write_at", 1)
|
||||
|
||||
from litellm import Choices, Message, ModelResponse, Usage
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
litellm.cache = Cache()
|
||||
import time
|
||||
|
@ -1685,7 +1685,7 @@ def test_call_with_key_over_model_budget(prisma_client):
|
|||
|
||||
# update spend using track_cost callback, make 2nd request, it should fail
|
||||
from litellm import Choices, Message, ModelResponse, Usage
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
from litellm.proxy.proxy_server import (
|
||||
_PROXY_track_cost_callback as track_cost_callback,
|
||||
)
|
||||
|
|
|
@ -25,7 +25,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
|
||||
from litellm.proxy.proxy_server import embeddings
|
||||
|
|
|
@ -20,7 +20,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
|
||||
|
||||
### UNIT TESTS FOR LEAST BUSY LOGGING ###
|
||||
|
|
|
@ -20,7 +20,7 @@ from litellm.proxy.enterprise.enterprise_hooks.llm_guard import _ENTERPRISE_LLMG
|
|||
from litellm import Router, mock_completion
|
||||
from litellm.proxy.utils import ProxyLogging, hash_token
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
### UNIT TESTS FOR LLM GUARD ###
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
# import asyncio
|
||||
# from litellm import Router, Timeout
|
||||
# import time
|
||||
# from litellm.caching import Cache
|
||||
# from litellm.caching.caching import Cache
|
||||
# import litellm
|
||||
|
||||
# litellm.cache = Cache(
|
||||
|
|
|
@ -15,7 +15,7 @@ sys.path.insert(
|
|||
import pytest
|
||||
from litellm import Router
|
||||
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
### UNIT TESTS FOR cost ROUTING ###
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
|
||||
|
||||
### UNIT TESTS FOR LATENCY ROUTING ###
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
# from litellm import Router
|
||||
# from litellm.proxy.utils import ProxyLogging, hash_token
|
||||
# from litellm.proxy._types import UserAPIKeyAuth
|
||||
# from litellm.caching import DualCache, RedisCache
|
||||
# from litellm.caching.caching import DualCache, RedisCache
|
||||
# from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
|
||||
# from datetime import datetime
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ from litellm.proxy.enterprise.enterprise_hooks.openai_moderation import (
|
|||
from litellm import Router, mock_completion
|
||||
from litellm.proxy.utils import ProxyLogging, hash_token
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
### UNIT TESTS FOR OpenAI Moderation ###
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.hooks.parallel_request_limiter import (
|
||||
_PROXY_MaxParallelRequestsHandler as MaxParallelRequestsHandler,
|
||||
|
|
|
@ -22,7 +22,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm import Router, mock_completion
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
|
|
|
@ -67,7 +67,7 @@ async def test_completion_with_caching_bad_call():
|
|||
litellm.set_verbose = True
|
||||
|
||||
try:
|
||||
from litellm.caching import RedisCache
|
||||
from litellm.caching.caching import RedisCache
|
||||
|
||||
litellm.service_callback = ["prometheus_system"]
|
||||
sl = ServiceLogging(mock_testing=True)
|
||||
|
|
|
@ -20,7 +20,7 @@ from litellm.proxy.hooks.prompt_injection_detection import (
|
|||
from litellm import Router, mock_completion
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
from litellm.proxy._types import UserAPIKeyAuth, LiteLLMPromptInjectionParams
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
|
@ -31,7 +31,7 @@ from starlette.datastructures import URL
|
|||
|
||||
import litellm
|
||||
from litellm import Router, mock_completion
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
|
||||
|
|
|
@ -745,7 +745,7 @@ async def test_team_update_redis():
|
|||
"""
|
||||
Tests if team update, updates the redis cache if set
|
||||
"""
|
||||
from litellm.caching import DualCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, RedisCache
|
||||
from litellm.proxy._types import LiteLLM_TeamTableCachedObj
|
||||
from litellm.proxy.auth.auth_checks import _cache_team_object
|
||||
|
||||
|
@ -775,7 +775,7 @@ async def test_get_team_redis(client_no_auth):
|
|||
"""
|
||||
Tests if get_team_object gets value from redis cache, if set
|
||||
"""
|
||||
from litellm.caching import DualCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, RedisCache
|
||||
from litellm.proxy.auth.auth_checks import get_team_object
|
||||
|
||||
proxy_logging_obj: ProxyLogging = getattr(
|
||||
|
|
|
@ -26,7 +26,7 @@ from starlette.datastructures import URL
|
|||
|
||||
import litellm
|
||||
from litellm import Router, mock_completion
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
|
||||
|
|
|
@ -3128,7 +3128,7 @@ async def test_azure_astreaming_and_function_calling():
|
|||
"content": f"What is the weather like in Boston? {uuid.uuid4()}",
|
||||
}
|
||||
]
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
litellm.cache = Cache(
|
||||
type="redis",
|
||||
|
|
|
@ -23,7 +23,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.router_strategy.lowest_tpm_rpm_v2 import (
|
||||
LowestTPMLoggingHandler_v2 as LowestTPMLoggingHandler,
|
||||
)
|
||||
|
|
|
@ -27,7 +27,7 @@ import pytest
|
|||
import litellm
|
||||
from litellm import Router, mock_completion
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.management_endpoints.internal_user_endpoints import (
|
||||
new_user,
|
||||
|
@ -53,7 +53,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
|||
|
||||
from starlette.datastructures import URL
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import (
|
||||
BlockUsers,
|
||||
DynamoDBArgs,
|
||||
|
|
|
@ -157,7 +157,7 @@ async def test_transcription_on_router():
|
|||
@pytest.mark.asyncio()
|
||||
async def test_transcription_caching():
|
||||
import litellm
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
litellm.set_verbose = True
|
||||
litellm.cache = Cache()
|
||||
|
|
|
@ -71,7 +71,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
|||
|
||||
from starlette.datastructures import URL
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import (
|
||||
DynamoDBArgs,
|
||||
GenerateKeyRequest,
|
||||
|
|
|
@ -78,7 +78,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
|||
|
||||
from starlette.datastructures import URL
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import *
|
||||
|
||||
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
|
||||
|
|
|
@ -17,7 +17,7 @@ from litellm.proxy._types import LitellmUserRoles
|
|||
import os
|
||||
import jwt
|
||||
import time
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
|||
|
||||
from starlette.datastructures import URL
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import (
|
||||
DynamoDBArgs,
|
||||
GenerateKeyRequest,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue