Support caching on reasoning content + other fixes (#8973)

* fix(factory.py): pass on anthropic thinking content from assistant call * fix(factory.py): fix anthropic messages to handle thinking blocks Fixes https://github.com/BerriAI/litellm/issues/8961 * fix(factory.py): fix bedrock handling for assistant content in messages Fixes https://github.com/BerriAI/litellm/issues/8961 * feat(convert_dict_to_response.py): handle reasoning content + thinking blocks in chat completion block ensures caching works for anthropic thinking block * fix(convert_dict_to_response.py): pass all message params to delta block ensures streaming delta also contains the reasoning content / thinking block * test(test_prompt_factory.py): remove redundant test anthropic now supports assistant as the first message * fix(factory.py): fix linting errors * fix: fix code qa * test: remove falsy test * fix(litellm_logging.py): fix str conversion
2025-04-25 10:44:24 +00:00 · 2025-03-04 21:12:16 -08:00 · 2025-03-04 21:12:16 -08:00 · 662c59adcf
commit 662c59adcf
parent 4c8b4fefc9
11 changed files with 230 additions and 50 deletions
--- a/litellm/caching/caching_handler.py
+++ b/litellm/caching/caching_handler.py
@ -247,7 +247,6 @@ class LLMCachingHandler:
                    pass
                else:
                    call_type = original_function.__name__
-
                    cached_result = self._convert_cached_result_to_model_response(
                        cached_result=cached_result,
                        call_type=call_type,
@ -725,6 +724,7 @@ class LLMCachingHandler:
        """
        Sync internal method to add the result to the cache
        """
+
        new_kwargs = kwargs.copy()
        new_kwargs.update(
            convert_args_to_kwargs(
@ -738,6 +738,7 @@ class LLMCachingHandler:
        if self._should_store_result_in_cache(
            original_function=self.original_function, kwargs=new_kwargs
        ):
+
            litellm.cache.add_cache(result, **new_kwargs)

        return