(refactor) sync caching - use LLMCachingHandler class for get_cache (#6249)

* caching - use _sync_set_cache * add sync _sync_add_streaming_response_to_cache * use caching class for cache storage * fix use _sync_get_cache * fix circular import * use _update_litellm_logging_obj_environment * use one helper for _process_async_embedding_cached_response * fix _is_call_type_supported_by_cache * fix checking cache * fix sync get cache * fix use _combine_cached_embedding_response_with_api_result * fix _update_litellm_logging_obj_environment * adjust test_redis_cache_acompletion_stream_bedrock
2024-10-16 12:33:49 +05:30 · 2024-10-16 12:33:49 +05:30 · 97ba4eea7d
commit 97ba4eea7d
parent 183bd5d873
3 changed files with 434 additions and 294 deletions
--- a/tests/local_testing/test_caching.py
+++ b/tests/local_testing/test_caching.py
@ -1067,7 +1067,7 @@ async def test_redis_cache_acompletion_stream_bedrock():
            response_1_content += chunk.choices[0].delta.content or ""
        print(response_1_content)

-        time.sleep(0.5)
+        await asyncio.sleep(1)
        print("\n\n Response 1 content: ", response_1_content, "\n\n")

        response2 = await litellm.acompletion(
@ -1082,8 +1082,8 @@ async def test_redis_cache_acompletion_stream_bedrock():
            response_2_content += chunk.choices[0].delta.content or ""
        print(response_2_content)

-        print("\nresponse 1", response_1_content)
-        print("\nresponse 2", response_2_content)
+        print("\nfinal response 1", response_1_content)
+        print("\nfinal response 2", response_2_content)
        assert (
            response_1_content == response_2_content
        ), f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}"