From 0533f77138d5277ee48b93b0ae0fb1da0fe90852 Mon Sep 17 00:00:00 2001 From: Ajeet D'Souza <98ajeet@gmail.com> Date: Wed, 28 Aug 2024 17:59:07 +0530 Subject: [PATCH] docs: add time.sleep() between streaming calls LiteLLM's cache appears to be updated in the background. Without this `time.sleep()` call, both responses take `0.8s` to return, but after adding it, the second response returns in `0.006s`. --- docs/my-website/docs/caching/caching_api.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/caching/caching_api.md b/docs/my-website/docs/caching/caching_api.md index 969de441f..ff31c34ea 100644 --- a/docs/my-website/docs/caching/caching_api.md +++ b/docs/my-website/docs/caching/caching_api.md @@ -51,8 +51,10 @@ LiteLLM can cache your streamed responses for you ### Usage ```python import litellm +import time from litellm import completion from litellm.caching import Cache + litellm.cache = Cache(type="hosted") # Make completion calls @@ -64,6 +66,7 @@ response1 = completion( for chunk in response1: print(chunk) +time.sleep(1) # cache is updated asynchronously response2 = completion( model="gpt-3.5-turbo", @@ -72,4 +75,4 @@ response2 = completion( caching=True) for chunk in response2: print(chunk) -``` \ No newline at end of file +```