fix - support text completion caching

2024-04-12 12:34:28 -07:00 · 2024-04-12 12:34:28 -07:00 · 0cef782ffa
commit 0cef782ffa
parent 11cd1ec6cf
1 changed files with 17 additions and 1 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2996,7 +2996,7 @@ def client(original_function):
                )
            ):  # allow users to control returning cached responses from the completion function
                # checking cache
-                print_verbose(f"INSIDE CHECKING CACHE")
+                print_verbose("INSIDE CHECKING CACHE")
                if (
                    litellm.cache is not None
                    and str(original_function.__name__)
@ -3103,6 +3103,22 @@ def client(original_function):
                                    response_object=cached_result,
                                    model_response_object=ModelResponse(),
                                )
+                        if (
+                            call_type == CallTypes.atext_completion.value
+                            and isinstance(cached_result, dict)
+                        ):
+                            if kwargs.get("stream", False) == True:
+                                cached_result = convert_to_streaming_response_async(
+                                    response_object=cached_result,
+                                )
+                                cached_result = CustomStreamWrapper(
+                                    completion_stream=cached_result,
+                                    model=model,
+                                    custom_llm_provider="cached_response",
+                                    logging_obj=logging_obj,
+                                )
+                            else:
+                                cached_result = TextCompletionResponse(**cached_result)
                        elif call_type == CallTypes.aembedding.value and isinstance(
                            cached_result, dict
                        ):