diff --git a/.circleci/config.yml b/.circleci/config.yml
index 2ec13464b..e4ed3a257 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -16,7 +16,8 @@ jobs:
             pip install infisical
             pip install pytest
             pip install openai[datalib]
-            pip install -Uq chromadb
+            pip install -Uq chromadb==0.3.29
+  
 
       # Run pytest and generate JUnit XML report
       - run:
diff --git a/litellm/tests/test_cache.py b/litellm/tests/test_cache.py
index d97d19142..05004d1a6 100644
--- a/litellm/tests/test_cache.py
+++ b/litellm/tests/test_cache.py
@@ -1,44 +1,44 @@
-# import sys, os
-# import traceback
-# from dotenv import load_dotenv
-# load_dotenv()
-# import os
+import sys, os
+import traceback
+from dotenv import load_dotenv
+load_dotenv()
+import os
 
-# sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
-# import pytest
-# import litellm
+sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+import pytest
+import litellm
 
-# # set cache to True
-# litellm.cache = True
-# litellm.cache_similarity_threshold = 0.5
+# set cache to True
+litellm.cache = True
+litellm.cache_similarity_threshold = 0.5
 
-# user_message = "Hello, whats the weather in San Francisco??"
-# messages = [{ "content": user_message,"role": "user"}]
+user_message = "Hello, whats the weather in San Francisco??"
+messages = [{ "content": user_message,"role": "user"}]
 
-# def test_completion_with_cache_gpt4():
-#     try:
-#         # in this test make the same call twice, measure the response time
-#         # the 2nd response time should be less than half of the first, ensuring that the cache is working
-#         import time
-#         start = time.time()
-#         print(litellm.cache)
-#         response = litellm.completion(model="gpt-4", messages=messages)
-#         end = time.time()
-#         first_call_time = end-start
-#         print(f"first call: {first_call_time}")
+def test_completion_with_cache_gpt4():
+    try:
+        # in this test make the same call twice, measure the response time
+        # the 2nd response time should be less than half of the first, ensuring that the cache is working
+        import time
+        start = time.time()
+        print(litellm.cache)
+        response = litellm.completion(model="gpt-4", messages=messages)
+        end = time.time()
+        first_call_time = end-start
+        print(f"first call: {first_call_time}")
 
-#         start = time.time()
-#         response = litellm.completion(model="gpt-4", messages=messages)
-#         end = time.time()
-#         second_call_time = end-start
-#         print(f"second call: {second_call_time}")
+        start = time.time()
+        response = litellm.completion(model="gpt-4", messages=messages)
+        end = time.time()
+        second_call_time = end-start
+        print(f"second call: {second_call_time}")
 
-#         if second_call_time > 1:
-#             # the 2nd call should be less than 1s
-#             pytest.fail(f"Cache is not working")
-#         # Add any assertions here to check the response
-#         print(response)
-#     except Exception as e:
-#         pytest.fail(f"Error occurred: {e}")
+        if second_call_time > 1:
+            # the 2nd call should be less than 1s
+            pytest.fail(f"Cache is not working")
+        # Add any assertions here to check the response
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
 
-# litellm.cache = False
\ No newline at end of file
+litellm.cache = False
\ No newline at end of file