fix(utils.py): only return cached streaming object for streaming calls

2025-04-25 10:44:24 +00:00 · 2024-02-21 21:27:40 -08:00 · 2024-02-21 21:27:40 -08:00 · fb2ae3a032
commit fb2ae3a032
parent f1742769a2
2 changed files with 10 additions and 8 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2330,13 +2330,13 @@ def client(original_function):
                                    model_response_object=ModelResponse(),
                                    stream=kwargs.get("stream", False),
                                )
-
-                                cached_result = CustomStreamWrapper(
-                                    completion_stream=cached_result,
-                                    model=model,
-                                    custom_llm_provider="cached_response",
-                                    logging_obj=logging_obj,
-                                )
+                                if kwargs.get("stream", False) == True:
+                                    cached_result = CustomStreamWrapper(
+                                        completion_stream=cached_result,
+                                        model=model,
+                                        custom_llm_provider="cached_response",
+                                        logging_obj=logging_obj,
+                                    )
                            elif call_type == CallTypes.embedding.value and isinstance(
                                cached_result, dict
                            ):