forked from phoenix/litellm-mirror
fix: n
This commit is contained in:
parent
bd5a14daf6
commit
df9df7b040
3 changed files with 58 additions and 111 deletions
|
@ -81,7 +81,7 @@ class RedisCache(BaseCache):
|
||||||
|
|
||||||
def set_cache(self, key, value, **kwargs):
|
def set_cache(self, key, value, **kwargs):
|
||||||
ttl = kwargs.get("ttl", None)
|
ttl = kwargs.get("ttl", None)
|
||||||
print_verbose(f"Set Redis Cache: key: {key}\nValue {value}")
|
print_verbose(f"Set Redis Cache: key: {key}\nValue {value}\nttl={ttl}")
|
||||||
try:
|
try:
|
||||||
self.redis_client.set(name=key, value=str(value), ex=ttl)
|
self.redis_client.set(name=key, value=str(value), ex=ttl)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -171,7 +171,7 @@ class S3Cache(BaseCache):
|
||||||
CacheControl=cache_control,
|
CacheControl=cache_control,
|
||||||
ContentType="application/json",
|
ContentType="application/json",
|
||||||
ContentLanguage="en",
|
ContentLanguage="en",
|
||||||
ContentDisposition=f"inline; filename=\"{key}.json\""
|
ContentDisposition=f'inline; filename="{key}.json"',
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
cache_control = "immutable, max-age=31536000, s-maxage=31536000"
|
cache_control = "immutable, max-age=31536000, s-maxage=31536000"
|
||||||
|
@ -183,7 +183,7 @@ class S3Cache(BaseCache):
|
||||||
CacheControl=cache_control,
|
CacheControl=cache_control,
|
||||||
ContentType="application/json",
|
ContentType="application/json",
|
||||||
ContentLanguage="en",
|
ContentLanguage="en",
|
||||||
ContentDisposition=f"inline; filename=\"{key}.json\""
|
ContentDisposition=f'inline; filename="{key}.json"',
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# NON blocking - notify users S3 is throwing an exception
|
# NON blocking - notify users S3 is throwing an exception
|
||||||
|
@ -495,7 +495,6 @@ class Cache:
|
||||||
cached_result is not None
|
cached_result is not None
|
||||||
and isinstance(cached_result, dict)
|
and isinstance(cached_result, dict)
|
||||||
and "timestamp" in cached_result
|
and "timestamp" in cached_result
|
||||||
and max_age is not None
|
|
||||||
):
|
):
|
||||||
timestamp = cached_result["timestamp"]
|
timestamp = cached_result["timestamp"]
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
|
@ -504,7 +503,7 @@ class Cache:
|
||||||
response_age = current_time - timestamp
|
response_age = current_time - timestamp
|
||||||
|
|
||||||
# Check if the cached response is older than the max-age
|
# Check if the cached response is older than the max-age
|
||||||
if response_age > max_age:
|
if max_age is not None and response_age > max_age:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"Cached response for key {cache_key} is too old. Max-age: {max_age}s, Age: {response_age}s"
|
f"Cached response for key {cache_key} is too old. Max-age: {max_age}s, Age: {response_age}s"
|
||||||
)
|
)
|
||||||
|
@ -565,6 +564,9 @@ class Cache:
|
||||||
async def _async_add_cache(self, result, *args, **kwargs):
|
async def _async_add_cache(self, result, *args, **kwargs):
|
||||||
self.add_cache(result, *args, **kwargs)
|
self.add_cache(result, *args, **kwargs)
|
||||||
|
|
||||||
|
async def _async_get_cache(self, *args, **kwargs):
|
||||||
|
return self.get_cache(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def enable_cache(
|
def enable_cache(
|
||||||
type: Optional[Literal["local", "redis", "s3"]] = "local",
|
type: Optional[Literal["local", "redis", "s3"]] = "local",
|
||||||
|
|
|
@ -267,10 +267,10 @@ async def acompletion(
|
||||||
elif asyncio.iscoroutine(init_response):
|
elif asyncio.iscoroutine(init_response):
|
||||||
response = await init_response
|
response = await init_response
|
||||||
else:
|
else:
|
||||||
response = init_response
|
response = init_response # type: ignore
|
||||||
else:
|
else:
|
||||||
# Call the synchronous function using run_in_executor
|
# Call the synchronous function using run_in_executor
|
||||||
response = await loop.run_in_executor(None, func_with_context)
|
response = await loop.run_in_executor(None, func_with_context) # type: ignore
|
||||||
# if kwargs.get("stream", False): # return an async generator
|
# if kwargs.get("stream", False): # return an async generator
|
||||||
# return _async_streaming(
|
# return _async_streaming(
|
||||||
# response=response,
|
# response=response,
|
||||||
|
|
|
@ -11,10 +11,10 @@ sys.path.insert(
|
||||||
) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
import pytest
|
import pytest
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion, aembedding
|
||||||
from litellm.caching import Cache
|
from litellm.caching import Cache
|
||||||
import random
|
import random
|
||||||
import hashlib
|
import hashlib, asyncio
|
||||||
|
|
||||||
# litellm.set_verbose=True
|
# litellm.set_verbose=True
|
||||||
|
|
||||||
|
@ -261,6 +261,51 @@ def test_embedding_caching_azure():
|
||||||
# test_embedding_caching_azure()
|
# test_embedding_caching_azure()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_embedding_caching_azure_individual_items():
|
||||||
|
"""
|
||||||
|
Tests caching for individual items in an embedding list
|
||||||
|
|
||||||
|
Assert if the same embeddingresponse object is returned for the duplicate item in 2 embedding list calls
|
||||||
|
|
||||||
|
```
|
||||||
|
embedding_1 = ["hey how's it going", "I'm doing well"]
|
||||||
|
embedding_val_1 = embedding(...)
|
||||||
|
|
||||||
|
embedding_2 = ["hey how's it going", "I'm fine"]
|
||||||
|
embedding_val_2 = embedding(...)
|
||||||
|
|
||||||
|
assert embedding_val_1[0]["id"] == embedding_val_2[0]["id"]
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
litellm.cache = Cache()
|
||||||
|
common_msg = f"hey how's it going {uuid.uuid4()}"
|
||||||
|
embedding_1 = [common_msg, "I'm doing well"]
|
||||||
|
embedding_2 = [common_msg, "I'm fine"]
|
||||||
|
|
||||||
|
embedding_val_1 = await aembedding(
|
||||||
|
model="azure/azure-embedding-model", input=embedding_1, caching=True
|
||||||
|
)
|
||||||
|
|
||||||
|
embedding_val_2 = await aembedding(
|
||||||
|
model="azure/azure-embedding-model", input=embedding_2, caching=True
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
embedding_val_2["data"][0]["embedding"]
|
||||||
|
!= embedding_val_1["data"][0]["embedding"]
|
||||||
|
):
|
||||||
|
print(f"embedding1: {embedding_val_1}")
|
||||||
|
print(f"embedding2: {embedding_val_2}")
|
||||||
|
pytest.fail("Error occurred: Embedding caching failed")
|
||||||
|
if (
|
||||||
|
embedding_val_2["data"][1]["embedding"]
|
||||||
|
== embedding_val_1["data"][1]["embedding"]
|
||||||
|
):
|
||||||
|
print(f"embedding1: {embedding_val_1}")
|
||||||
|
print(f"embedding2: {embedding_val_2}")
|
||||||
|
pytest.fail("Error occurred: Embedding caching failed")
|
||||||
|
|
||||||
|
|
||||||
def test_redis_cache_completion():
|
def test_redis_cache_completion():
|
||||||
litellm.set_verbose = False
|
litellm.set_verbose = False
|
||||||
|
|
||||||
|
@ -401,14 +446,14 @@ def test_redis_cache_completion_stream():
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
test_redis_cache_completion_stream()
|
# test_redis_cache_completion_stream()
|
||||||
|
|
||||||
|
|
||||||
def test_redis_cache_acompletion_stream():
|
def test_redis_cache_acompletion_stream():
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = False
|
||||||
random_word = generate_random_word()
|
random_word = generate_random_word()
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
|
@ -436,7 +481,6 @@ def test_redis_cache_acompletion_stream():
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
async for chunk in response1:
|
async for chunk in response1:
|
||||||
print(chunk)
|
|
||||||
response_1_content += chunk.choices[0].delta.content or ""
|
response_1_content += chunk.choices[0].delta.content or ""
|
||||||
print(response_1_content)
|
print(response_1_content)
|
||||||
|
|
||||||
|
@ -454,7 +498,6 @@ def test_redis_cache_acompletion_stream():
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
async for chunk in response2:
|
async for chunk in response2:
|
||||||
print(chunk)
|
|
||||||
response_2_content += chunk.choices[0].delta.content or ""
|
response_2_content += chunk.choices[0].delta.content or ""
|
||||||
print(response_2_content)
|
print(response_2_content)
|
||||||
|
|
||||||
|
@ -916,101 +959,3 @@ def test_cache_context_managers():
|
||||||
|
|
||||||
|
|
||||||
# test_cache_context_managers()
|
# test_cache_context_managers()
|
||||||
|
|
||||||
# test_custom_redis_cache_params()
|
|
||||||
|
|
||||||
# def test_redis_cache_with_ttl():
|
|
||||||
# cache = Cache(type="redis", host=os.environ['REDIS_HOST'], port=os.environ['REDIS_PORT'], password=os.environ['REDIS_PASSWORD'])
|
|
||||||
# sample_model_response_object_str = """{
|
|
||||||
# "choices": [
|
|
||||||
# {
|
|
||||||
# "finish_reason": "stop",
|
|
||||||
# "index": 0,
|
|
||||||
# "message": {
|
|
||||||
# "role": "assistant",
|
|
||||||
# "content": "I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic."
|
|
||||||
# }
|
|
||||||
# }
|
|
||||||
# ],
|
|
||||||
# "created": 1691429984.3852863,
|
|
||||||
# "model": "claude-instant-1",
|
|
||||||
# "usage": {
|
|
||||||
# "prompt_tokens": 18,
|
|
||||||
# "completion_tokens": 23,
|
|
||||||
# "total_tokens": 41
|
|
||||||
# }
|
|
||||||
# }"""
|
|
||||||
# sample_model_response_object = {
|
|
||||||
# "choices": [
|
|
||||||
# {
|
|
||||||
# "finish_reason": "stop",
|
|
||||||
# "index": 0,
|
|
||||||
# "message": {
|
|
||||||
# "role": "assistant",
|
|
||||||
# "content": "I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic."
|
|
||||||
# }
|
|
||||||
# }
|
|
||||||
# ],
|
|
||||||
# "created": 1691429984.3852863,
|
|
||||||
# "model": "claude-instant-1",
|
|
||||||
# "usage": {
|
|
||||||
# "prompt_tokens": 18,
|
|
||||||
# "completion_tokens": 23,
|
|
||||||
# "total_tokens": 41
|
|
||||||
# }
|
|
||||||
# }
|
|
||||||
# cache.add_cache(cache_key="test_key", result=sample_model_response_object_str, ttl=1)
|
|
||||||
# cached_value = cache.get_cache(cache_key="test_key")
|
|
||||||
# print(f"cached-value: {cached_value}")
|
|
||||||
# assert cached_value['choices'][0]['message']['content'] == sample_model_response_object['choices'][0]['message']['content']
|
|
||||||
# time.sleep(2)
|
|
||||||
# assert cache.get_cache(cache_key="test_key") is None
|
|
||||||
|
|
||||||
# # test_redis_cache_with_ttl()
|
|
||||||
|
|
||||||
# def test_in_memory_cache_with_ttl():
|
|
||||||
# cache = Cache(type="local")
|
|
||||||
# sample_model_response_object_str = """{
|
|
||||||
# "choices": [
|
|
||||||
# {
|
|
||||||
# "finish_reason": "stop",
|
|
||||||
# "index": 0,
|
|
||||||
# "message": {
|
|
||||||
# "role": "assistant",
|
|
||||||
# "content": "I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic."
|
|
||||||
# }
|
|
||||||
# }
|
|
||||||
# ],
|
|
||||||
# "created": 1691429984.3852863,
|
|
||||||
# "model": "claude-instant-1",
|
|
||||||
# "usage": {
|
|
||||||
# "prompt_tokens": 18,
|
|
||||||
# "completion_tokens": 23,
|
|
||||||
# "total_tokens": 41
|
|
||||||
# }
|
|
||||||
# }"""
|
|
||||||
# sample_model_response_object = {
|
|
||||||
# "choices": [
|
|
||||||
# {
|
|
||||||
# "finish_reason": "stop",
|
|
||||||
# "index": 0,
|
|
||||||
# "message": {
|
|
||||||
# "role": "assistant",
|
|
||||||
# "content": "I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic."
|
|
||||||
# }
|
|
||||||
# }
|
|
||||||
# ],
|
|
||||||
# "created": 1691429984.3852863,
|
|
||||||
# "model": "claude-instant-1",
|
|
||||||
# "usage": {
|
|
||||||
# "prompt_tokens": 18,
|
|
||||||
# "completion_tokens": 23,
|
|
||||||
# "total_tokens": 41
|
|
||||||
# }
|
|
||||||
# }
|
|
||||||
# cache.add_cache(cache_key="test_key", result=sample_model_response_object_str, ttl=1)
|
|
||||||
# cached_value = cache.get_cache(cache_key="test_key")
|
|
||||||
# assert cached_value['choices'][0]['message']['content'] == sample_model_response_object['choices'][0]['message']['content']
|
|
||||||
# time.sleep(2)
|
|
||||||
# assert cache.get_cache(cache_key="test_key") is None
|
|
||||||
# # test_in_memory_cache_with_ttl()
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue