fix(utils.py): only return cached streaming object for streaming calls

2025-04-25 18:54:30 +00:00 · 2024-02-21 21:27:40 -08:00 · 2024-02-21 21:27:40 -08:00 · fb2ae3a032
commit fb2ae3a032
parent f1742769a2
2 changed files with 10 additions and 8 deletions
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -124,7 +124,9 @@ class RedisCache(BaseCache):
            self.redis_client.set(name=key, value=str(value), ex=ttl)
        except Exception as e:
            # NON blocking - notify users Redis is throwing an exception
-            print_verbose("LiteLLM Caching: set() - Got exception from REDIS : ", e)
+            print_verbose(
                f"LiteLLM Caching: set() - Got exception from REDIS : {str(e)}"
            )
    async def async_set_cache(self, key, value, **kwargs):
        _redis_client = self.init_async_client()
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2330,13 +2330,13 @@ def client(original_function):
                                    model_response_object=ModelResponse(),
                                    stream=kwargs.get("stream", False),
                                )
-
+                                if kwargs.get("stream", False) == True:
-                                cached_result = CustomStreamWrapper(
+                                    cached_result = CustomStreamWrapper(
-                                    completion_stream=cached_result,
+                                        completion_stream=cached_result,
-                                    model=model,
+                                        model=model,
-                                    custom_llm_provider="cached_response",
+                                        custom_llm_provider="cached_response",
-                                    logging_obj=logging_obj,
+                                        logging_obj=logging_obj,
-                                )
+                                    )
                            elif call_type == CallTypes.embedding.value and isinstance(
                                cached_result, dict
                            ):