forked from phoenix/litellm-mirror
(feat) log semantic_sim to langfuse
This commit is contained in:
parent
c4e73768cf
commit
751fb1af89
1 changed files with 7 additions and 1 deletions
|
@ -471,9 +471,11 @@ class RedisSemanticCache(BaseCache):
|
||||||
)
|
)
|
||||||
results = await self.index.aquery(query)
|
results = await self.index.aquery(query)
|
||||||
if results == None:
|
if results == None:
|
||||||
|
kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0
|
||||||
return None
|
return None
|
||||||
if isinstance(results, list):
|
if isinstance(results, list):
|
||||||
if len(results) == 0:
|
if len(results) == 0:
|
||||||
|
kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0
|
||||||
return None
|
return None
|
||||||
|
|
||||||
vector_distance = results[0]["vector_distance"]
|
vector_distance = results[0]["vector_distance"]
|
||||||
|
@ -485,6 +487,10 @@ class RedisSemanticCache(BaseCache):
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"semantic cache: similarity threshold: {self.similarity_threshold}, similarity: {similarity}, prompt: {prompt}, closest_cached_prompt: {cached_prompt}"
|
f"semantic cache: similarity threshold: {self.similarity_threshold}, similarity: {similarity}, prompt: {prompt}, closest_cached_prompt: {cached_prompt}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# update kwargs["metadata"] with similarity, don't rewrite the original metadata
|
||||||
|
kwargs.setdefault("metadata", {})["semantic-similarity"] = similarity
|
||||||
|
|
||||||
if similarity > self.similarity_threshold:
|
if similarity > self.similarity_threshold:
|
||||||
# cache hit !
|
# cache hit !
|
||||||
cached_value = results[0]["response"]
|
cached_value = results[0]["response"]
|
||||||
|
@ -968,7 +974,7 @@ class Cache:
|
||||||
"s-max-age", cache_control_args.get("s-maxage", float("inf"))
|
"s-max-age", cache_control_args.get("s-maxage", float("inf"))
|
||||||
)
|
)
|
||||||
cached_result = await self.cache.async_get_cache(
|
cached_result = await self.cache.async_get_cache(
|
||||||
cache_key, messages=messages
|
cache_key, *args, **kwargs
|
||||||
)
|
)
|
||||||
return self._get_cache_logic(
|
return self._get_cache_logic(
|
||||||
cached_result=cached_result, max_age=max_age
|
cached_result=cached_result, max_age=max_age
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue