forked from phoenix/litellm-mirror
fix(utils.py): enable streaming cache logging
This commit is contained in:
parent
b011c8b93a
commit
2d62dee712
2 changed files with 12 additions and 7 deletions
|
@ -797,6 +797,8 @@ async def test_async_completion_azure_caching():
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_completion_azure_caching_streaming():
|
||||
import copy
|
||||
|
||||
litellm.set_verbose = True
|
||||
customHandler_caching = CompletionCustomHandler()
|
||||
litellm.cache = Cache(
|
||||
|
@ -816,8 +818,9 @@ async def test_async_completion_azure_caching_streaming():
|
|||
stream=True,
|
||||
)
|
||||
async for chunk in response1:
|
||||
continue
|
||||
print(f"chunk in response1: {chunk}")
|
||||
await asyncio.sleep(1)
|
||||
initial_customhandler_caching_states = len(customHandler_caching.states)
|
||||
print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
|
||||
response2 = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
|
@ -828,14 +831,14 @@ async def test_async_completion_azure_caching_streaming():
|
|||
stream=True,
|
||||
)
|
||||
async for chunk in response2:
|
||||
continue
|
||||
print(f"chunk in response2: {chunk}")
|
||||
await asyncio.sleep(1) # success callbacks are done in parallel
|
||||
print(
|
||||
f"customHandler_caching.states post-cache hit: {customHandler_caching.states}"
|
||||
)
|
||||
assert len(customHandler_caching.errors) == 0
|
||||
assert (
|
||||
len(customHandler_caching.states) == 4
|
||||
len(customHandler_caching.states) > initial_customhandler_caching_states
|
||||
) # pre, post, streaming .., success, success
|
||||
|
||||
|
||||
|
|
|
@ -1411,7 +1411,7 @@ class Logging:
|
|||
print_verbose(
|
||||
f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
|
||||
)
|
||||
return
|
||||
pass
|
||||
else:
|
||||
print_verbose(
|
||||
"success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
|
||||
|
@ -1616,7 +1616,7 @@ class Logging:
|
|||
print_verbose(
|
||||
f"async success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
|
||||
)
|
||||
return
|
||||
pass
|
||||
else:
|
||||
print_verbose(
|
||||
"async success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
|
||||
|
@ -1625,8 +1625,10 @@ class Logging:
|
|||
# only add to cache once we have a complete streaming response
|
||||
litellm.cache.add_cache(result, **kwargs)
|
||||
if isinstance(callback, CustomLogger): # custom logger class
|
||||
print_verbose(f"Async success callbacks: {callback}")
|
||||
if self.stream:
|
||||
print_verbose(
|
||||
f"Async success callbacks: {callback}; self.stream: {self.stream}; complete_streaming_response: {self.model_call_details.get('complete_streaming_response', None)}"
|
||||
)
|
||||
if self.stream == True:
|
||||
if "complete_streaming_response" in self.model_call_details:
|
||||
await callback.async_log_success_event(
|
||||
kwargs=self.model_call_details,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue