(refactor) sync caching - use LLMCachingHandler class for get_cache (#6249)

* caching - use _sync_set_cache

* add sync _sync_add_streaming_response_to_cache

* use caching class for cache storage

* fix use _sync_get_cache

* fix circular import

* use _update_litellm_logging_obj_environment

* use one helper for _process_async_embedding_cached_response

* fix _is_call_type_supported_by_cache

* fix checking cache

* fix sync get cache

* fix use _combine_cached_embedding_response_with_api_result

* fix _update_litellm_logging_obj_environment

* adjust test_redis_cache_acompletion_stream_bedrock
This commit is contained in:
Ishaan Jaff 2024-10-16 12:33:49 +05:30 committed by GitHub
parent 183bd5d873
commit 97ba4eea7d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 434 additions and 294 deletions

View file

@ -1067,7 +1067,7 @@ async def test_redis_cache_acompletion_stream_bedrock():
response_1_content += chunk.choices[0].delta.content or ""
print(response_1_content)
time.sleep(0.5)
await asyncio.sleep(1)
print("\n\n Response 1 content: ", response_1_content, "\n\n")
response2 = await litellm.acompletion(
@ -1082,8 +1082,8 @@ async def test_redis_cache_acompletion_stream_bedrock():
response_2_content += chunk.choices[0].delta.content or ""
print(response_2_content)
print("\nresponse 1", response_1_content)
print("\nresponse 2", response_2_content)
print("\nfinal response 1", response_1_content)
print("\nfinal response 2", response_2_content)
assert (
response_1_content == response_2_content
), f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}"