forked from phoenix/litellm-mirror
(test) semantic caching
This commit is contained in:
parent
cf4bd1cf4e
commit
81f8ac00b2
1 changed files with 17 additions and 9 deletions
|
@ -995,21 +995,29 @@ def test_redis_semantic_cache_completion():
|
||||||
random_number = random.randint(
|
random_number = random.randint(
|
||||||
1, 100000
|
1, 100000
|
||||||
) # add a random number to ensure it's always adding / reading from cache
|
) # add a random number to ensure it's always adding / reading from cache
|
||||||
messages = [
|
|
||||||
{"role": "user", "content": f"write a one sentence poem about: {random_number}"}
|
print("testing semantic caching")
|
||||||
]
|
|
||||||
litellm.cache = Cache(
|
litellm.cache = Cache(
|
||||||
type="redis-semantic",
|
type="redis-semantic",
|
||||||
host=os.environ["REDIS_HOST"],
|
host=os.environ["REDIS_HOST"],
|
||||||
port=os.environ["REDIS_PORT"],
|
port=os.environ["REDIS_PORT"],
|
||||||
password=os.environ["REDIS_PASSWORD"],
|
password=os.environ["REDIS_PASSWORD"],
|
||||||
similarity_threshold=0.5,
|
similarity_threshold=0.8,
|
||||||
)
|
)
|
||||||
print("test2 for Redis Caching - non streaming")
|
response1 = completion(
|
||||||
response1 = completion(model="gpt-3.5-turbo", messages=messages, max_tokens=20)
|
model="gpt-3.5-turbo",
|
||||||
# response2 = completion(
|
messages=[
|
||||||
# model="gpt-3.5-turbo", messages=messages,max_tokens=20
|
{
|
||||||
# )
|
"role": "user",
|
||||||
|
"content": f"write a one sentence poem about: {random_number}",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
max_tokens=20,
|
||||||
|
)
|
||||||
|
print(f"response1: {response1}")
|
||||||
|
|
||||||
|
assert response1.id == "chatcmpl-8p5GejSWLJ1pDI1lfhc6Idhwd2bDJ"
|
||||||
|
# assert response1.choices[0].message == 1
|
||||||
|
|
||||||
|
|
||||||
# test_redis_cache_completion()
|
# test_redis_cache_completion()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue