forked from phoenix/litellm-mirror
(refactor caching) use LLMCachingHandler for caching streaming responses (#6210)
* use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * refactor async set stream cache * fix linting
This commit is contained in:
parent
78f3228e17
commit
d1bef4ad81
3 changed files with 75 additions and 37 deletions
|
@ -24,6 +24,7 @@ from litellm import (
|
|||
verbose_logger,
|
||||
)
|
||||
from litellm.caching.caching import DualCache, InMemoryCache, S3Cache
|
||||
from litellm.caching.caching_handler import LLMCachingHandler
|
||||
from litellm.cost_calculator import _select_model_name_for_cost_calc
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
@ -271,6 +272,7 @@ class Logging:
|
|||
|
||||
## TIME TO FIRST TOKEN LOGGING ##
|
||||
self.completion_start_time: Optional[datetime.datetime] = None
|
||||
self._llm_caching_handler: Optional[LLMCachingHandler] = None
|
||||
|
||||
def process_dynamic_callbacks(self):
|
||||
"""
|
||||
|
@ -1625,35 +1627,6 @@ class Logging:
|
|||
if kwargs.get("no-log", False) is True:
|
||||
print_verbose("no-log request, skipping logging")
|
||||
continue
|
||||
if (
|
||||
callback == "cache"
|
||||
and litellm.cache is not None
|
||||
and self.model_call_details.get("litellm_params", {}).get(
|
||||
"acompletion", False
|
||||
)
|
||||
is True
|
||||
):
|
||||
# set_cache once complete streaming response is built
|
||||
print_verbose("async success_callback: reaches cache for logging!")
|
||||
kwargs = self.model_call_details
|
||||
if self.stream:
|
||||
if "async_complete_streaming_response" not in kwargs:
|
||||
print_verbose(
|
||||
f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n"
|
||||
)
|
||||
pass
|
||||
else:
|
||||
print_verbose(
|
||||
"async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache"
|
||||
)
|
||||
result = kwargs["async_complete_streaming_response"]
|
||||
# only add to cache once we have a complete streaming response
|
||||
if litellm.cache is not None and not isinstance(
|
||||
litellm.cache.cache, S3Cache
|
||||
):
|
||||
await litellm.cache.async_add_cache(result, **kwargs)
|
||||
else:
|
||||
litellm.cache.add_cache(result, **kwargs)
|
||||
if callback == "openmeter" and openMeterLogger is not None:
|
||||
if self.stream is True:
|
||||
if (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue