forked from phoenix/litellm-mirror
fix(utils.py): enable streaming cache logging
This commit is contained in:
parent
b011c8b93a
commit
2d62dee712
2 changed files with 12 additions and 7 deletions
|
@ -797,6 +797,8 @@ async def test_async_completion_azure_caching():
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_async_completion_azure_caching_streaming():
|
async def test_async_completion_azure_caching_streaming():
|
||||||
|
import copy
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
customHandler_caching = CompletionCustomHandler()
|
customHandler_caching = CompletionCustomHandler()
|
||||||
litellm.cache = Cache(
|
litellm.cache = Cache(
|
||||||
|
@ -816,8 +818,9 @@ async def test_async_completion_azure_caching_streaming():
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
async for chunk in response1:
|
async for chunk in response1:
|
||||||
continue
|
print(f"chunk in response1: {chunk}")
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
|
initial_customhandler_caching_states = len(customHandler_caching.states)
|
||||||
print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
|
print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
|
||||||
response2 = await litellm.acompletion(
|
response2 = await litellm.acompletion(
|
||||||
model="azure/chatgpt-v-2",
|
model="azure/chatgpt-v-2",
|
||||||
|
@ -828,14 +831,14 @@ async def test_async_completion_azure_caching_streaming():
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
async for chunk in response2:
|
async for chunk in response2:
|
||||||
continue
|
print(f"chunk in response2: {chunk}")
|
||||||
await asyncio.sleep(1) # success callbacks are done in parallel
|
await asyncio.sleep(1) # success callbacks are done in parallel
|
||||||
print(
|
print(
|
||||||
f"customHandler_caching.states post-cache hit: {customHandler_caching.states}"
|
f"customHandler_caching.states post-cache hit: {customHandler_caching.states}"
|
||||||
)
|
)
|
||||||
assert len(customHandler_caching.errors) == 0
|
assert len(customHandler_caching.errors) == 0
|
||||||
assert (
|
assert (
|
||||||
len(customHandler_caching.states) == 4
|
len(customHandler_caching.states) > initial_customhandler_caching_states
|
||||||
) # pre, post, streaming .., success, success
|
) # pre, post, streaming .., success, success
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1411,7 +1411,7 @@ class Logging:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
|
f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
|
||||||
)
|
)
|
||||||
return
|
pass
|
||||||
else:
|
else:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
"success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
|
"success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
|
||||||
|
@ -1616,7 +1616,7 @@ class Logging:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"async success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
|
f"async success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
|
||||||
)
|
)
|
||||||
return
|
pass
|
||||||
else:
|
else:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
"async success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
|
"async success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
|
||||||
|
@ -1625,8 +1625,10 @@ class Logging:
|
||||||
# only add to cache once we have a complete streaming response
|
# only add to cache once we have a complete streaming response
|
||||||
litellm.cache.add_cache(result, **kwargs)
|
litellm.cache.add_cache(result, **kwargs)
|
||||||
if isinstance(callback, CustomLogger): # custom logger class
|
if isinstance(callback, CustomLogger): # custom logger class
|
||||||
print_verbose(f"Async success callbacks: {callback}")
|
print_verbose(
|
||||||
if self.stream:
|
f"Async success callbacks: {callback}; self.stream: {self.stream}; complete_streaming_response: {self.model_call_details.get('complete_streaming_response', None)}"
|
||||||
|
)
|
||||||
|
if self.stream == True:
|
||||||
if "complete_streaming_response" in self.model_call_details:
|
if "complete_streaming_response" in self.model_call_details:
|
||||||
await callback.async_log_success_event(
|
await callback.async_log_success_event(
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue