temp test chromadb

2025-04-24 18:24:20 +00:00 · 2023-08-10 15:22:38 -07:00 · 2023-08-10 15:22:38 -07:00 · 8e7479a1a1
commit 8e7479a1a1
parent 5b9f1e1928
2 changed files with 39 additions and 38 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -16,7 +16,8 @@ jobs:
            pip install infisical
            pip install pytest
            pip install openai[datalib]
-            pip install -Uq chromadb
+            pip install -Uq chromadb==0.3.29
+  

      # Run pytest and generate JUnit XML report
      - run:
--- a/litellm/tests/test_cache.py
+++ b/litellm/tests/test_cache.py
@ -1,44 +1,44 @@
-# import sys, os
-# import traceback
-# from dotenv import load_dotenv
-# load_dotenv()
-# import os
+import sys, os
+import traceback
+from dotenv import load_dotenv
+load_dotenv()
+import os

-# sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
-# import pytest
-# import litellm
+sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+import pytest
+import litellm

-# # set cache to True
-# litellm.cache = True
-# litellm.cache_similarity_threshold = 0.5
+# set cache to True
+litellm.cache = True
+litellm.cache_similarity_threshold = 0.5

-# user_message = "Hello, whats the weather in San Francisco??"
-# messages = [{ "content": user_message,"role": "user"}]
+user_message = "Hello, whats the weather in San Francisco??"
+messages = [{ "content": user_message,"role": "user"}]

-# def test_completion_with_cache_gpt4():
-#     try:
-#         # in this test make the same call twice, measure the response time
-#         # the 2nd response time should be less than half of the first, ensuring that the cache is working
-#         import time
-#         start = time.time()
-#         print(litellm.cache)
-#         response = litellm.completion(model="gpt-4", messages=messages)
-#         end = time.time()
-#         first_call_time = end-start
-#         print(f"first call: {first_call_time}")
+def test_completion_with_cache_gpt4():
+    try:
+        # in this test make the same call twice, measure the response time
+        # the 2nd response time should be less than half of the first, ensuring that the cache is working
+        import time
+        start = time.time()
+        print(litellm.cache)
+        response = litellm.completion(model="gpt-4", messages=messages)
+        end = time.time()
+        first_call_time = end-start
+        print(f"first call: {first_call_time}")

-#         start = time.time()
-#         response = litellm.completion(model="gpt-4", messages=messages)
-#         end = time.time()
-#         second_call_time = end-start
-#         print(f"second call: {second_call_time}")
+        start = time.time()
+        response = litellm.completion(model="gpt-4", messages=messages)
+        end = time.time()
+        second_call_time = end-start
+        print(f"second call: {second_call_time}")

-#         if second_call_time > 1:
-#             # the 2nd call should be less than 1s
-#             pytest.fail(f"Cache is not working")
-#         # Add any assertions here to check the response
-#         print(response)
-#     except Exception as e:
-#         pytest.fail(f"Error occurred: {e}")
+        if second_call_time > 1:
+            # the 2nd call should be less than 1s
+            pytest.fail(f"Cache is not working")
+        # Add any assertions here to check the response
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")

-# litellm.cache = False
+litellm.cache = False