forked from phoenix/litellm-mirror
test(test_caching.py): fix async tests
This commit is contained in:
parent
8a20ea795b
commit
3072137739
2 changed files with 81 additions and 95 deletions
|
@ -150,6 +150,9 @@ class RedisCache(BaseCache):
|
||||||
await redis_client.set(
|
await redis_client.set(
|
||||||
name=key, value=json.dumps(value), ex=ttl, get=True
|
name=key, value=json.dumps(value), ex=ttl, get=True
|
||||||
)
|
)
|
||||||
|
print_verbose(
|
||||||
|
f"Successfully Set ASYNC Redis Cache: key: {key}\nValue {value}\nttl={ttl}"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# NON blocking - notify users Redis is throwing an exception
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
print_verbose(
|
print_verbose(
|
||||||
|
@ -216,7 +219,7 @@ class RedisCache(BaseCache):
|
||||||
_redis_client = self.init_async_client()
|
_redis_client = self.init_async_client()
|
||||||
async with _redis_client as redis_client:
|
async with _redis_client as redis_client:
|
||||||
try:
|
try:
|
||||||
print_verbose(f"Get Redis Cache: key: {key}")
|
print_verbose(f"Get Async Redis Cache: key: {key}")
|
||||||
cached_response = await redis_client.get(key)
|
cached_response = await redis_client.get(key)
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"Got Async Redis Cache: key: {key}, cached_response {cached_response}"
|
f"Got Async Redis Cache: key: {key}, cached_response {cached_response}"
|
||||||
|
@ -225,8 +228,9 @@ class RedisCache(BaseCache):
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# NON blocking - notify users Redis is throwing an exception
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
traceback.print_exc()
|
print_verbose(
|
||||||
logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e)
|
f"LiteLLM Caching: async get() - Got exception from REDIS: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
async def async_get_cache_pipeline(self, key_list) -> dict:
|
async def async_get_cache_pipeline(self, key_list) -> dict:
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -474,11 +474,10 @@ def test_redis_cache_completion_stream():
|
||||||
# test_redis_cache_completion_stream()
|
# test_redis_cache_completion_stream()
|
||||||
|
|
||||||
|
|
||||||
def test_redis_cache_acompletion_stream():
|
@pytest.mark.asyncio
|
||||||
import asyncio
|
async def test_redis_cache_acompletion_stream():
|
||||||
|
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = False
|
litellm.set_verbose = True
|
||||||
random_word = generate_random_word()
|
random_word = generate_random_word()
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
|
@ -496,8 +495,6 @@ def test_redis_cache_acompletion_stream():
|
||||||
response_1_content = ""
|
response_1_content = ""
|
||||||
response_2_content = ""
|
response_2_content = ""
|
||||||
|
|
||||||
async def call1():
|
|
||||||
nonlocal response_1_content
|
|
||||||
response1 = await litellm.acompletion(
|
response1 = await litellm.acompletion(
|
||||||
model="gpt-3.5-turbo",
|
model="gpt-3.5-turbo",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
@ -509,12 +506,9 @@ def test_redis_cache_acompletion_stream():
|
||||||
response_1_content += chunk.choices[0].delta.content or ""
|
response_1_content += chunk.choices[0].delta.content or ""
|
||||||
print(response_1_content)
|
print(response_1_content)
|
||||||
|
|
||||||
asyncio.run(call1())
|
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
print("\n\n Response 1 content: ", response_1_content, "\n\n")
|
print("\n\n Response 1 content: ", response_1_content, "\n\n")
|
||||||
|
|
||||||
async def call2():
|
|
||||||
nonlocal response_2_content
|
|
||||||
response2 = await litellm.acompletion(
|
response2 = await litellm.acompletion(
|
||||||
model="gpt-3.5-turbo",
|
model="gpt-3.5-turbo",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
@ -526,7 +520,6 @@ def test_redis_cache_acompletion_stream():
|
||||||
response_2_content += chunk.choices[0].delta.content or ""
|
response_2_content += chunk.choices[0].delta.content or ""
|
||||||
print(response_2_content)
|
print(response_2_content)
|
||||||
|
|
||||||
asyncio.run(call2())
|
|
||||||
print("\nresponse 1", response_1_content)
|
print("\nresponse 1", response_1_content)
|
||||||
print("\nresponse 2", response_2_content)
|
print("\nresponse 2", response_2_content)
|
||||||
assert (
|
assert (
|
||||||
|
@ -536,14 +529,15 @@ def test_redis_cache_acompletion_stream():
|
||||||
litellm.success_callback = []
|
litellm.success_callback = []
|
||||||
litellm._async_success_callback = []
|
litellm._async_success_callback = []
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(f"{str(e)}\n\n{traceback.format_exc()}")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
# test_redis_cache_acompletion_stream()
|
# test_redis_cache_acompletion_stream()
|
||||||
|
|
||||||
|
|
||||||
def test_redis_cache_acompletion_stream_bedrock():
|
@pytest.mark.asyncio
|
||||||
|
async def test_redis_cache_acompletion_stream_bedrock():
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -565,8 +559,6 @@ def test_redis_cache_acompletion_stream_bedrock():
|
||||||
response_1_content = ""
|
response_1_content = ""
|
||||||
response_2_content = ""
|
response_2_content = ""
|
||||||
|
|
||||||
async def call1():
|
|
||||||
nonlocal response_1_content
|
|
||||||
response1 = await litellm.acompletion(
|
response1 = await litellm.acompletion(
|
||||||
model="bedrock/anthropic.claude-v2",
|
model="bedrock/anthropic.claude-v2",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
@ -579,12 +571,9 @@ def test_redis_cache_acompletion_stream_bedrock():
|
||||||
response_1_content += chunk.choices[0].delta.content or ""
|
response_1_content += chunk.choices[0].delta.content or ""
|
||||||
print(response_1_content)
|
print(response_1_content)
|
||||||
|
|
||||||
asyncio.run(call1())
|
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
print("\n\n Response 1 content: ", response_1_content, "\n\n")
|
print("\n\n Response 1 content: ", response_1_content, "\n\n")
|
||||||
|
|
||||||
async def call2():
|
|
||||||
nonlocal response_2_content
|
|
||||||
response2 = await litellm.acompletion(
|
response2 = await litellm.acompletion(
|
||||||
model="bedrock/anthropic.claude-v2",
|
model="bedrock/anthropic.claude-v2",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
@ -597,7 +586,6 @@ def test_redis_cache_acompletion_stream_bedrock():
|
||||||
response_2_content += chunk.choices[0].delta.content or ""
|
response_2_content += chunk.choices[0].delta.content or ""
|
||||||
print(response_2_content)
|
print(response_2_content)
|
||||||
|
|
||||||
asyncio.run(call2())
|
|
||||||
print("\nresponse 1", response_1_content)
|
print("\nresponse 1", response_1_content)
|
||||||
print("\nresponse 2", response_2_content)
|
print("\nresponse 2", response_2_content)
|
||||||
assert (
|
assert (
|
||||||
|
@ -612,8 +600,8 @@ def test_redis_cache_acompletion_stream_bedrock():
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
@pytest.mark.asyncio
|
||||||
def test_s3_cache_acompletion_stream_azure():
|
async def test_s3_cache_acompletion_stream_azure():
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -637,8 +625,6 @@ def test_s3_cache_acompletion_stream_azure():
|
||||||
response_1_created = ""
|
response_1_created = ""
|
||||||
response_2_created = ""
|
response_2_created = ""
|
||||||
|
|
||||||
async def call1():
|
|
||||||
nonlocal response_1_content, response_1_created
|
|
||||||
response1 = await litellm.acompletion(
|
response1 = await litellm.acompletion(
|
||||||
model="azure/chatgpt-v-2",
|
model="azure/chatgpt-v-2",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
@ -652,12 +638,9 @@ def test_s3_cache_acompletion_stream_azure():
|
||||||
response_1_content += chunk.choices[0].delta.content or ""
|
response_1_content += chunk.choices[0].delta.content or ""
|
||||||
print(response_1_content)
|
print(response_1_content)
|
||||||
|
|
||||||
asyncio.run(call1())
|
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
print("\n\n Response 1 content: ", response_1_content, "\n\n")
|
print("\n\n Response 1 content: ", response_1_content, "\n\n")
|
||||||
|
|
||||||
async def call2():
|
|
||||||
nonlocal response_2_content, response_2_created
|
|
||||||
response2 = await litellm.acompletion(
|
response2 = await litellm.acompletion(
|
||||||
model="azure/chatgpt-v-2",
|
model="azure/chatgpt-v-2",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
@ -671,7 +654,6 @@ def test_s3_cache_acompletion_stream_azure():
|
||||||
response_2_created = chunk.created
|
response_2_created = chunk.created
|
||||||
print(response_2_content)
|
print(response_2_content)
|
||||||
|
|
||||||
asyncio.run(call2())
|
|
||||||
print("\nresponse 1", response_1_content)
|
print("\nresponse 1", response_1_content)
|
||||||
print("\nresponse 2", response_2_content)
|
print("\nresponse 2", response_2_content)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue