From 8425a8ba222b77bae2e390ae3438c1214a5d1872 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 7 Feb 2024 19:21:50 -0800 Subject: [PATCH] (fix) track cost for semantic_caching, place on langfuse trace --- litellm/caching.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/litellm/caching.py b/litellm/caching.py index f996a5873..3522a9d43 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -427,10 +427,16 @@ class RedisSemanticCache(BaseCache): else [] ) if llm_router is not None and self.embedding_model in router_model_names: + user_api_key = kwargs.get("metadata", {}).get("user_api_key", "") embedding_response = await llm_router.aembedding( model=self.embedding_model, input=prompt, cache={"no-store": True, "no-cache": True}, + metadata={ + "user_api_key": user_api_key, + "semantic-cache-embedding": True, + "trace_id": kwargs.get("metadata", {}).get("trace_id", None), + }, ) else: # convert to embedding @@ -476,13 +482,20 @@ class RedisSemanticCache(BaseCache): else [] ) if llm_router is not None and self.embedding_model in router_model_names: + user_api_key = kwargs.get("metadata", {}).get("user_api_key", "") embedding_response = await llm_router.aembedding( model=self.embedding_model, input=prompt, cache={"no-store": True, "no-cache": True}, + metadata={ + "user_api_key": user_api_key, + "semantic-cache-embedding": True, + "trace_id": kwargs.get("metadata", {}).get("trace_id", None), + }, ) else: # convert to embedding + user_api_key = kwargs["litellm_params"]["metadata"].get("user_api_key", "") embedding_response = await litellm.aembedding( model=self.embedding_model, input=prompt,