fix(utils.py): enable streaming cache logging

This commit is contained in:
Krrish Dholakia 2024-02-21 21:10:58 -08:00
parent b011c8b93a
commit 2d62dee712
2 changed files with 12 additions and 7 deletions

View file

@ -797,6 +797,8 @@ async def test_async_completion_azure_caching():
@pytest.mark.asyncio
async def test_async_completion_azure_caching_streaming():
import copy
litellm.set_verbose = True
customHandler_caching = CompletionCustomHandler()
litellm.cache = Cache(
@ -816,8 +818,9 @@ async def test_async_completion_azure_caching_streaming():
stream=True,
)
async for chunk in response1:
continue
print(f"chunk in response1: {chunk}")
await asyncio.sleep(1)
initial_customhandler_caching_states = len(customHandler_caching.states)
print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
response2 = await litellm.acompletion(
model="azure/chatgpt-v-2",
@ -828,14 +831,14 @@ async def test_async_completion_azure_caching_streaming():
stream=True,
)
async for chunk in response2:
continue
print(f"chunk in response2: {chunk}")
await asyncio.sleep(1) # success callbacks are done in parallel
print(
f"customHandler_caching.states post-cache hit: {customHandler_caching.states}"
)
assert len(customHandler_caching.errors) == 0
assert (
len(customHandler_caching.states) == 4
len(customHandler_caching.states) > initial_customhandler_caching_states
) # pre, post, streaming .., success, success

View file

@ -1411,7 +1411,7 @@ class Logging:
print_verbose(
f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
)
return
pass
else:
print_verbose(
"success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
@ -1616,7 +1616,7 @@ class Logging:
print_verbose(
f"async success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
)
return
pass
else:
print_verbose(
"async success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
@ -1625,8 +1625,10 @@ class Logging:
# only add to cache once we have a complete streaming response
litellm.cache.add_cache(result, **kwargs)
if isinstance(callback, CustomLogger): # custom logger class
print_verbose(f"Async success callbacks: {callback}")
if self.stream:
print_verbose(
f"Async success callbacks: {callback}; self.stream: {self.stream}; complete_streaming_response: {self.model_call_details.get('complete_streaming_response', None)}"
)
if self.stream == True:
if "complete_streaming_response" in self.model_call_details:
await callback.async_log_success_event(
kwargs=self.model_call_details,