litellm-mirror/tests/local_testing/test_unit_test_caching.py
Ishaan Jaff bc9b3e4847
(Bug fix) - don't log messages in model_parameters in StandardLoggingPayload (#8932)
* define model param helper

* use ModelParamHelper

* get_standard_logging_model_parameters

* fix code quality

* get_standard_logging_model_parameters

* StandardLoggingPayload

* test_get_kwargs_for_cache_key

* test_langsmith_key_based_logging

* fix code qa

* fix linting
2025-03-01 13:39:45 -08:00

253 lines
7.5 KiB
Python

import os
import sys
import time
import traceback
import uuid
from dotenv import load_dotenv
load_dotenv()
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import asyncio
import hashlib
import random
import pytest
import litellm
from litellm import aembedding, completion, embedding
from litellm.caching.caching import Cache
from unittest.mock import AsyncMock, patch, MagicMock
from litellm.caching.caching_handler import LLMCachingHandler, CachingHandlerResponse
from litellm.caching.caching import LiteLLMCacheType
from litellm.types.utils import CallTypes
from litellm.types.rerank import RerankResponse
from litellm.types.utils import (
ModelResponse,
EmbeddingResponse,
TextCompletionResponse,
TranscriptionResponse,
Embedding,
)
from datetime import timedelta, datetime
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
from litellm.litellm_core_utils.model_param_helper import ModelParamHelper
from litellm._logging import verbose_logger
import logging
def test_get_kwargs_for_cache_key():
_cache = litellm.Cache()
relevant_kwargs = ModelParamHelper._get_all_llm_api_params()
print(relevant_kwargs)
def test_get_cache_key_chat_completion():
cache = Cache()
kwargs = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "Hello, world!"}],
"temperature": 0.7,
}
cache_key_1 = cache.get_cache_key(**kwargs)
assert isinstance(cache_key_1, str)
assert len(cache_key_1) > 0
kwargs_2 = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "Hello, world!"}],
"max_completion_tokens": 100,
}
cache_key_2 = cache.get_cache_key(**kwargs_2)
assert cache_key_1 != cache_key_2
kwargs_3 = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "Hello, world!"}],
"max_completion_tokens": 100,
}
cache_key_3 = cache.get_cache_key(**kwargs_3)
assert cache_key_2 == cache_key_3
def test_get_cache_key_embedding():
cache = Cache()
kwargs = {
"model": "text-embedding-3-small",
"input": "Hello, world!",
"dimensions": 1536,
}
cache_key_1 = cache.get_cache_key(**kwargs)
assert isinstance(cache_key_1, str)
assert len(cache_key_1) > 0
kwargs_2 = {
"model": "text-embedding-3-small",
"input": "Hello, world!",
"dimensions": 1539,
}
cache_key_2 = cache.get_cache_key(**kwargs_2)
assert cache_key_1 != cache_key_2
kwargs_3 = {
"model": "text-embedding-3-small",
"input": "Hello, world!",
"dimensions": 1539,
}
cache_key_3 = cache.get_cache_key(**kwargs_3)
assert cache_key_2 == cache_key_3
def test_get_cache_key_text_completion():
cache = Cache()
kwargs = {
"model": "gpt-3.5-turbo",
"prompt": "Hello, world! here is a second line",
"best_of": 3,
"logit_bias": {"123": 1},
"seed": 42,
}
cache_key_1 = cache.get_cache_key(**kwargs)
assert isinstance(cache_key_1, str)
assert len(cache_key_1) > 0
kwargs_2 = {
"model": "gpt-3.5-turbo",
"prompt": "Hello, world! here is a second line",
"best_of": 30,
}
cache_key_2 = cache.get_cache_key(**kwargs_2)
assert cache_key_1 != cache_key_2
kwargs_3 = {
"model": "gpt-3.5-turbo",
"prompt": "Hello, world! here is a second line",
"best_of": 30,
}
cache_key_3 = cache.get_cache_key(**kwargs_3)
assert cache_key_2 == cache_key_3
def test_get_hashed_cache_key():
cache = Cache()
cache_key = "model:gpt-3.5-turbo,messages:Hello world"
hashed_key = Cache._get_hashed_cache_key(cache_key)
assert len(hashed_key) == 64 # SHA-256 produces a 64-character hex string
def test_add_namespace_to_cache_key():
cache = Cache(namespace="test_namespace")
hashed_key = "abcdef1234567890"
# Test with class-level namespace
result = cache._add_namespace_to_cache_key(hashed_key)
assert result == "test_namespace:abcdef1234567890"
# Test with metadata namespace
kwargs = {"metadata": {"redis_namespace": "custom_namespace"}}
result = cache._add_namespace_to_cache_key(hashed_key, **kwargs)
assert result == "custom_namespace:abcdef1234567890"
# Test with cache control namespace
kwargs = {"cache": {"namespace": "cache_control_namespace"}}
result = cache._add_namespace_to_cache_key(hashed_key, **kwargs)
assert result == "cache_control_namespace:abcdef1234567890"
kwargs = {"cache": {"namespace": "cache_control_namespace-2"}}
result = cache._add_namespace_to_cache_key(hashed_key, **kwargs)
assert result == "cache_control_namespace-2:abcdef1234567890"
def test_get_model_param_value():
cache = Cache()
# Test with regular model
kwargs = {"model": "gpt-3.5-turbo"}
assert cache._get_model_param_value(kwargs) == "gpt-3.5-turbo"
# Test with model_group
kwargs = {"model": "gpt-3.5-turbo", "metadata": {"model_group": "gpt-group"}}
assert cache._get_model_param_value(kwargs) == "gpt-group"
# Test with caching_group
kwargs = {
"model": "gpt-3.5-turbo",
"metadata": {
"model_group": "openai-gpt-3.5-turbo",
"caching_groups": [("openai-gpt-3.5-turbo", "azure-gpt-3.5-turbo")],
},
}
assert (
cache._get_model_param_value(kwargs)
== "('openai-gpt-3.5-turbo', 'azure-gpt-3.5-turbo')"
)
kwargs = {
"model": "gpt-3.5-turbo",
"metadata": {
"model_group": "azure-gpt-3.5-turbo",
"caching_groups": [("openai-gpt-3.5-turbo", "azure-gpt-3.5-turbo")],
},
}
assert (
cache._get_model_param_value(kwargs)
== "('openai-gpt-3.5-turbo', 'azure-gpt-3.5-turbo')"
)
kwargs = {
"model": "gpt-3.5-turbo",
"metadata": {
"model_group": "not-in-caching-group-gpt-3.5-turbo",
"caching_groups": [("openai-gpt-3.5-turbo", "azure-gpt-3.5-turbo")],
},
}
assert cache._get_model_param_value(kwargs) == "not-in-caching-group-gpt-3.5-turbo"
def test_preset_cache_key():
"""
Test that the preset cache key is used if it is set in kwargs["litellm_params"]
"""
cache = Cache()
kwargs = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "Hello, world!"}],
"temperature": 0.7,
"litellm_params": {"preset_cache_key": "preset-cache-key"},
}
assert cache.get_cache_key(**kwargs) == "preset-cache-key"
def test_generate_streaming_content():
cache = Cache()
content = "Hello, this is a test message."
generator = cache.generate_streaming_content(content)
full_response = ""
chunk_count = 0
for chunk in generator:
chunk_count += 1
assert "choices" in chunk
assert len(chunk["choices"]) == 1
assert "delta" in chunk["choices"][0]
assert "role" in chunk["choices"][0]["delta"]
assert chunk["choices"][0]["delta"]["role"] == "assistant"
assert "content" in chunk["choices"][0]["delta"]
chunk_content = chunk["choices"][0]["delta"]["content"]
full_response += chunk_content
# Check that each chunk is no longer than 5 characters
assert len(chunk_content) <= 5
print("full_response from generate_streaming_content", full_response)
# Check that the full content is reconstructed correctly
assert full_response == content
# Check that there were multiple chunks
assert chunk_count > 1
print(f"Number of chunks: {chunk_count}")