test(test_caching.py): fix async tests

This commit is contained in:
Krrish Dholakia 2024-03-15 18:09:25 -07:00
parent 8a20ea795b
commit 3072137739
2 changed files with 81 additions and 95 deletions

View file

@ -150,6 +150,9 @@ class RedisCache(BaseCache):
await redis_client.set( await redis_client.set(
name=key, value=json.dumps(value), ex=ttl, get=True name=key, value=json.dumps(value), ex=ttl, get=True
) )
print_verbose(
f"Successfully Set ASYNC Redis Cache: key: {key}\nValue {value}\nttl={ttl}"
)
except Exception as e: except Exception as e:
# NON blocking - notify users Redis is throwing an exception # NON blocking - notify users Redis is throwing an exception
print_verbose( print_verbose(
@ -216,7 +219,7 @@ class RedisCache(BaseCache):
_redis_client = self.init_async_client() _redis_client = self.init_async_client()
async with _redis_client as redis_client: async with _redis_client as redis_client:
try: try:
print_verbose(f"Get Redis Cache: key: {key}") print_verbose(f"Get Async Redis Cache: key: {key}")
cached_response = await redis_client.get(key) cached_response = await redis_client.get(key)
print_verbose( print_verbose(
f"Got Async Redis Cache: key: {key}, cached_response {cached_response}" f"Got Async Redis Cache: key: {key}, cached_response {cached_response}"
@ -225,8 +228,9 @@ class RedisCache(BaseCache):
return response return response
except Exception as e: except Exception as e:
# NON blocking - notify users Redis is throwing an exception # NON blocking - notify users Redis is throwing an exception
traceback.print_exc() print_verbose(
logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e) f"LiteLLM Caching: async get() - Got exception from REDIS: {str(e)}"
)
async def async_get_cache_pipeline(self, key_list) -> dict: async def async_get_cache_pipeline(self, key_list) -> dict:
""" """

View file

@ -474,11 +474,10 @@ def test_redis_cache_completion_stream():
# test_redis_cache_completion_stream() # test_redis_cache_completion_stream()
def test_redis_cache_acompletion_stream(): @pytest.mark.asyncio
import asyncio async def test_redis_cache_acompletion_stream():
try: try:
litellm.set_verbose = False litellm.set_verbose = True
random_word = generate_random_word() random_word = generate_random_word()
messages = [ messages = [
{ {
@ -496,37 +495,31 @@ def test_redis_cache_acompletion_stream():
response_1_content = "" response_1_content = ""
response_2_content = "" response_2_content = ""
async def call1(): response1 = await litellm.acompletion(
nonlocal response_1_content model="gpt-3.5-turbo",
response1 = await litellm.acompletion( messages=messages,
model="gpt-3.5-turbo", max_tokens=40,
messages=messages, temperature=1,
max_tokens=40, stream=True,
temperature=1, )
stream=True, async for chunk in response1:
) response_1_content += chunk.choices[0].delta.content or ""
async for chunk in response1: print(response_1_content)
response_1_content += chunk.choices[0].delta.content or ""
print(response_1_content)
asyncio.run(call1())
time.sleep(0.5) time.sleep(0.5)
print("\n\n Response 1 content: ", response_1_content, "\n\n") print("\n\n Response 1 content: ", response_1_content, "\n\n")
async def call2(): response2 = await litellm.acompletion(
nonlocal response_2_content model="gpt-3.5-turbo",
response2 = await litellm.acompletion( messages=messages,
model="gpt-3.5-turbo", max_tokens=40,
messages=messages, temperature=1,
max_tokens=40, stream=True,
temperature=1, )
stream=True, async for chunk in response2:
) response_2_content += chunk.choices[0].delta.content or ""
async for chunk in response2: print(response_2_content)
response_2_content += chunk.choices[0].delta.content or ""
print(response_2_content)
asyncio.run(call2())
print("\nresponse 1", response_1_content) print("\nresponse 1", response_1_content)
print("\nresponse 2", response_2_content) print("\nresponse 2", response_2_content)
assert ( assert (
@ -536,14 +529,15 @@ def test_redis_cache_acompletion_stream():
litellm.success_callback = [] litellm.success_callback = []
litellm._async_success_callback = [] litellm._async_success_callback = []
except Exception as e: except Exception as e:
print(e) print(f"{str(e)}\n\n{traceback.format_exc()}")
raise e raise e
# test_redis_cache_acompletion_stream() # test_redis_cache_acompletion_stream()
def test_redis_cache_acompletion_stream_bedrock(): @pytest.mark.asyncio
async def test_redis_cache_acompletion_stream_bedrock():
import asyncio import asyncio
try: try:
@ -565,39 +559,33 @@ def test_redis_cache_acompletion_stream_bedrock():
response_1_content = "" response_1_content = ""
response_2_content = "" response_2_content = ""
async def call1(): response1 = await litellm.acompletion(
nonlocal response_1_content model="bedrock/anthropic.claude-v2",
response1 = await litellm.acompletion( messages=messages,
model="bedrock/anthropic.claude-v2", max_tokens=40,
messages=messages, temperature=1,
max_tokens=40, stream=True,
temperature=1, )
stream=True, async for chunk in response1:
) print(chunk)
async for chunk in response1: response_1_content += chunk.choices[0].delta.content or ""
print(chunk) print(response_1_content)
response_1_content += chunk.choices[0].delta.content or ""
print(response_1_content)
asyncio.run(call1())
time.sleep(0.5) time.sleep(0.5)
print("\n\n Response 1 content: ", response_1_content, "\n\n") print("\n\n Response 1 content: ", response_1_content, "\n\n")
async def call2(): response2 = await litellm.acompletion(
nonlocal response_2_content model="bedrock/anthropic.claude-v2",
response2 = await litellm.acompletion( messages=messages,
model="bedrock/anthropic.claude-v2", max_tokens=40,
messages=messages, temperature=1,
max_tokens=40, stream=True,
temperature=1, )
stream=True, async for chunk in response2:
) print(chunk)
async for chunk in response2: response_2_content += chunk.choices[0].delta.content or ""
print(chunk) print(response_2_content)
response_2_content += chunk.choices[0].delta.content or ""
print(response_2_content)
asyncio.run(call2())
print("\nresponse 1", response_1_content) print("\nresponse 1", response_1_content)
print("\nresponse 2", response_2_content) print("\nresponse 2", response_2_content)
assert ( assert (
@ -612,8 +600,8 @@ def test_redis_cache_acompletion_stream_bedrock():
raise e raise e
@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio
def test_s3_cache_acompletion_stream_azure(): async def test_s3_cache_acompletion_stream_azure():
import asyncio import asyncio
try: try:
@ -637,41 +625,35 @@ def test_s3_cache_acompletion_stream_azure():
response_1_created = "" response_1_created = ""
response_2_created = "" response_2_created = ""
async def call1(): response1 = await litellm.acompletion(
nonlocal response_1_content, response_1_created model="azure/chatgpt-v-2",
response1 = await litellm.acompletion( messages=messages,
model="azure/chatgpt-v-2", max_tokens=40,
messages=messages, temperature=1,
max_tokens=40, stream=True,
temperature=1, )
stream=True, async for chunk in response1:
) print(chunk)
async for chunk in response1: response_1_created = chunk.created
print(chunk) response_1_content += chunk.choices[0].delta.content or ""
response_1_created = chunk.created print(response_1_content)
response_1_content += chunk.choices[0].delta.content or ""
print(response_1_content)
asyncio.run(call1())
time.sleep(0.5) time.sleep(0.5)
print("\n\n Response 1 content: ", response_1_content, "\n\n") print("\n\n Response 1 content: ", response_1_content, "\n\n")
async def call2(): response2 = await litellm.acompletion(
nonlocal response_2_content, response_2_created model="azure/chatgpt-v-2",
response2 = await litellm.acompletion( messages=messages,
model="azure/chatgpt-v-2", max_tokens=40,
messages=messages, temperature=1,
max_tokens=40, stream=True,
temperature=1, )
stream=True, async for chunk in response2:
) print(chunk)
async for chunk in response2: response_2_content += chunk.choices[0].delta.content or ""
print(chunk) response_2_created = chunk.created
response_2_content += chunk.choices[0].delta.content or "" print(response_2_content)
response_2_created = chunk.created
print(response_2_content)
asyncio.run(call2())
print("\nresponse 1", response_1_content) print("\nresponse 1", response_1_content)
print("\nresponse 2", response_2_content) print("\nresponse 2", response_2_content)