From 0533f77138d5277ee48b93b0ae0fb1da0fe90852 Mon Sep 17 00:00:00 2001
From: Ajeet D'Souza <98ajeet@gmail.com>
Date: Wed, 28 Aug 2024 17:59:07 +0530
Subject: [PATCH] docs: add time.sleep() between streaming calls

LiteLLM's cache appears to be updated in the background. Without this `time.sleep()` call, both responses take `0.8s` to return, but after adding it, the second response returns in `0.006s`.
---
 docs/my-website/docs/caching/caching_api.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/my-website/docs/caching/caching_api.md b/docs/my-website/docs/caching/caching_api.md
index 969de441f..ff31c34ea 100644
--- a/docs/my-website/docs/caching/caching_api.md
+++ b/docs/my-website/docs/caching/caching_api.md
@@ -51,8 +51,10 @@ LiteLLM can cache your streamed responses for you
 ### Usage
 ```python
 import litellm
+import time
 from litellm import completion
 from litellm.caching import Cache
+
 litellm.cache = Cache(type="hosted")
 
 # Make completion calls
@@ -64,6 +66,7 @@ response1 = completion(
 for chunk in response1:
     print(chunk)
 
+time.sleep(1) # cache is updated asynchronously
 
 response2 = completion(
     model="gpt-3.5-turbo", 
@@ -72,4 +75,4 @@ response2 = completion(
     caching=True)
 for chunk in response2:
     print(chunk)
-```
\ No newline at end of file
+```