fix(caching.py): hash the cache key to prevent key too long errors

2023-12-29 15:03:33 +05:30 · 2023-12-29 15:03:33 +05:30 · 1e07f0fce8
commit 1e07f0fce8
parent 6e68cd1125
3 changed files with 28 additions and 13 deletions
--- a/docs/my-website/docs/load_test.md
+++ b/docs/my-website/docs/load_test.md
@ -1,6 +1,6 @@
 # 🔥 Load Test LiteLLM 
-Here is a script to load test LiteLLM 
+Here is a script to load test LiteLLM vs OpenAI 
 ```python
 from openai import AsyncOpenAI, AsyncAzureOpenAI
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -9,7 +9,7 @@
 import litellm
 import time, logging
-import json, traceback, ast
+import json, traceback, ast, hashlib
 from typing import Optional, Literal, List
@ -301,7 +301,12 @@ class Cache:
                    param_value = kwargs[param]
                cache_key += f"{str(param)}: {str(param_value)}"
        print_verbose(f"\nCreated cache key: {cache_key}")
-        return cache_key
+        # Use hashlib to create a sha256 hash of the cache key
        hash_object = hashlib.sha256(cache_key.encode())
        # Hexadecimal representation of the hash
        hash_hex = hash_object.hexdigest()
        print_verbose(f"Hashed cache key (SHA-256): {hash_hex}")
        return hash_hex
    def generate_streaming_content(self, content):
        chunk_size = 5  # Adjust the chunk size as needed
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -14,6 +14,7 @@ import litellm
 from litellm import embedding, completion
 from litellm.caching import Cache
 import random
 import hashlib
 # litellm.set_verbose=True
@ -656,12 +657,13 @@ def test_get_cache_key():
                "litellm_logging_obj": {},
            }
        )
        cache_key_str = "model: gpt-3.5-turbomessages: [{'role': 'user', 'content': 'write a one sentence poem about: 7510'}]temperature: 0.2max_tokens: 40"
        hash_object = hashlib.sha256(cache_key_str.encode())
        # Hexadecimal representation of the hash
        hash_hex = hash_object.hexdigest()
        assert cache_key == hash_hex
        assert (
-            cache_key
+            cache_key_2 == hash_hex
            == "model: gpt-3.5-turbomessages: [{'role': 'user', 'content': 'write a one sentence poem about: 7510'}]temperature: 0.2max_tokens: 40"
        )
        assert (
            cache_key == cache_key_2
        ), f"{cache_key} != {cache_key_2}. The same kwargs should have the same cache key across runs"
        embedding_cache_key = cache_instance.get_cache_key(
@ -680,9 +682,14 @@ def test_get_cache_key():
        print(embedding_cache_key)
        embedding_cache_key_str = (
            "model: azure/azure-embedding-modelinput: ['hi who is ishaan']"
        )
        hash_object = hashlib.sha256(embedding_cache_key_str.encode())
        # Hexadecimal representation of the hash
        hash_hex = hash_object.hexdigest()
        assert (
-            embedding_cache_key
+            embedding_cache_key == hash_hex
            == "model: azure/azure-embedding-modelinput: ['hi who is ishaan']"
        ), f"{embedding_cache_key} != 'model: azure/azure-embedding-modelinput: ['hi who is ishaan']'. The same kwargs should have the same cache key across runs"
        # Proxy - embedding cache, test if embedding key, gets model_group and not model
@ -736,10 +743,13 @@ def test_get_cache_key():
        )
        print(embedding_cache_key_2)
-        assert (
+        embedding_cache_key_str_2 = (
-            embedding_cache_key_2
+            "model: EMBEDDING_MODEL_GROUPinput: ['hi who is ishaan']"
            == "model: EMBEDDING_MODEL_GROUPinput: ['hi who is ishaan']"
        )
        hash_object = hashlib.sha256(embedding_cache_key_str_2.encode())
        # Hexadecimal representation of the hash
        hash_hex = hash_object.hexdigest()
        assert embedding_cache_key_2 == hash_hex
        print("passed!")
    except Exception as e:
        traceback.print_exc()