diff --git a/litellm/caching.py b/litellm/caching.py index a898b7d7d..2ff4f0c82 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -12,18 +12,6 @@ import time, logging import json, traceback, ast from typing import Optional -def get_prompt(*args, **kwargs): - # make this safe checks, it should not throw any exceptions - if len(args) > 1: - messages = args[1] - prompt = " ".join(message["content"] for message in messages) - return prompt - if "messages" in kwargs: - messages = kwargs["messages"] - prompt = " ".join(message["content"] for message in messages) - return prompt - return None - def print_verbose(print_statement): try: if litellm.set_verbose: @@ -309,4 +297,9 @@ class Cache: result = result.model_dump_json() self.cache.set_cache(cache_key, result, **kwargs) except Exception as e: + print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}") + traceback.print_exc() pass + + async def _async_add_cache(self, result, *args, **kwargs): + self.add_cache(result, *args, **kwargs) \ No newline at end of file diff --git a/litellm/tests/test_caching.py b/litellm/tests/test_caching.py index c4e3cd819..978da57d6 100644 --- a/litellm/tests/test_caching.py +++ b/litellm/tests/test_caching.py @@ -29,6 +29,7 @@ def generate_random_word(length=4): messages = [{"role": "user", "content": "who is ishaan 5222"}] def test_caching_v2(): # test in memory cache try: + litellm.set_verbose=True litellm.cache = Cache() response1 = completion(model="gpt-3.5-turbo", messages=messages, caching=True) response2 = completion(model="gpt-3.5-turbo", messages=messages, caching=True) @@ -40,7 +41,7 @@ def test_caching_v2(): # test in memory cache if response2['choices'][0]['message']['content'] != response1['choices'][0]['message']['content']: print(f"response1: {response1}") print(f"response2: {response2}") - pytest.fail(f"Error occurred: {e}") + pytest.fail(f"Error occurred:") except Exception as e: print(f"error occurred: {traceback.format_exc()}") pytest.fail(f"Error occurred: {e}") diff --git a/litellm/utils.py b/litellm/utils.py index 3ff1ce4b1..af3b9f447 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1682,9 +1682,9 @@ def client(original_function): # [OPTIONAL] ADD TO CACHE if litellm.caching or litellm.caching_with_models or litellm.cache != None: # user init a cache object if isinstance(result, litellm.ModelResponse) or isinstance(result, litellm.EmbeddingResponse): - litellm.cache.add_cache(result.json(), *args, **kwargs) + asyncio.create_task(litellm.cache._async_add_cache(result.json(), *args, **kwargs)) else: - litellm.cache.add_cache(result, *args, **kwargs) + asyncio.create_task(litellm.cache._async_add_cache(result, *args, **kwargs)) # LOG SUCCESS - handle streaming success logging in the _next_ object print_verbose(f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}") asyncio.create_task(logging_obj.async_success_handler(result, start_time, end_time))