Merge pull request #2991 from BerriAI/litellm_fix_text_completion_caching

[Feat] Support + Test caching for TextCompletion
2025-04-26 11:14:04 +00:00 · 2024-04-12 20:08:01 -07:00 · 2024-04-12 20:08:01 -07:00 · 0540ca4918
commit 0540ca4918
parent 74aa230eac 0cef782ffa
2 changed files with 51 additions and 1 deletions
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -707,6 +707,40 @@ async def test_redis_cache_acompletion_stream():
 # test_redis_cache_acompletion_stream()


+@pytest.mark.asyncio
+async def test_redis_cache_atext_completion():
+    try:
+        litellm.set_verbose = True
+        prompt = f"write a one sentence poem about: {uuid.uuid4()}"
+        litellm.cache = Cache(
+            type="redis",
+            host=os.environ["REDIS_HOST"],
+            port=os.environ["REDIS_PORT"],
+            password=os.environ["REDIS_PASSWORD"],
+            supported_call_types=["atext_completion"],
+        )
+        print("test for caching, atext_completion")
+
+        response1 = await litellm.atext_completion(
+            model="gpt-3.5-turbo-instruct", prompt=prompt, max_tokens=40, temperature=1
+        )
+
+        await asyncio.sleep(0.5)
+        print("\n\n Response 1 content: ", response1, "\n\n")
+
+        response2 = await litellm.atext_completion(
+            model="gpt-3.5-turbo-instruct", prompt=prompt, max_tokens=40, temperature=1
+        )
+
+        print(response2)
+
+        assert response1 == response2
+        assert response1.choices == response2.choices
+    except Exception as e:
+        print(f"{str(e)}\n\n{traceback.format_exc()}")
+        raise e
+
+
@pytest.mark.asyncio
 async def test_redis_cache_acompletion_stream_bedrock():
    import asyncio
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2996,7 +2996,7 @@ def client(original_function):
                )
            ):  # allow users to control returning cached responses from the completion function
                # checking cache
-                print_verbose(f"INSIDE CHECKING CACHE")
+                print_verbose("INSIDE CHECKING CACHE")
                if (
                    litellm.cache is not None
                    and str(original_function.__name__)
@ -3103,6 +3103,22 @@ def client(original_function):
                                    response_object=cached_result,
                                    model_response_object=ModelResponse(),
                                )
+                        if (
+                            call_type == CallTypes.atext_completion.value
+                            and isinstance(cached_result, dict)
+                        ):
+                            if kwargs.get("stream", False) == True:
+                                cached_result = convert_to_streaming_response_async(
+                                    response_object=cached_result,
+                                )
+                                cached_result = CustomStreamWrapper(
+                                    completion_stream=cached_result,
+                                    model=model,
+                                    custom_llm_provider="cached_response",
+                                    logging_obj=logging_obj,
+                                )
+                            else:
+                                cached_result = TextCompletionResponse(**cached_result)
                        elif call_type == CallTypes.aembedding.value and isinstance(
                            cached_result, dict
                        ):