Support caching on reasoning content + other fixes (#8973)

* fix(factory.py): pass on anthropic thinking content from assistant call * fix(factory.py): fix anthropic messages to handle thinking blocks Fixes https://github.com/BerriAI/litellm/issues/8961 * fix(factory.py): fix bedrock handling for assistant content in messages Fixes https://github.com/BerriAI/litellm/issues/8961 * feat(convert_dict_to_response.py): handle reasoning content + thinking blocks in chat completion block ensures caching works for anthropic thinking block * fix(convert_dict_to_response.py): pass all message params to delta block ensures streaming delta also contains the reasoning content / thinking block * test(test_prompt_factory.py): remove redundant test anthropic now supports assistant as the first message * fix(factory.py): fix linting errors * fix: fix code qa * test: remove falsy test * fix(litellm_logging.py): fix str conversion
2025-04-26 03:04:13 +00:00 · 2025-03-04 21:12:16 -08:00 · 2025-03-04 21:12:16 -08:00 · 662c59adcf
commit 662c59adcf
parent 4c8b4fefc9
11 changed files with 230 additions and 50 deletions
--- a/tests/local_testing/test_caching.py
+++ b/tests/local_testing/test_caching.py
@ -2561,3 +2561,30 @@ def test_redis_caching_multiple_namespaces():

    # request 4 without a namespace should not be cached under the same key as request 3
    assert response_4.id != response_3.id
+
+
+def test_caching_with_reasoning_content():
+    """
+    Test that reasoning content is cached
+    """
+
+    import uuid
+
+    messages = [{"role": "user", "content": f"what is litellm? {uuid.uuid4()}"}]
+    litellm.cache = Cache()
+
+    response_1 = completion(
+        model="anthropic/claude-3-7-sonnet-latest",
+        messages=messages,
+        thinking={"type": "enabled", "budget_tokens": 1024},
+    )
+
+    response_2 = completion(
+        model="anthropic/claude-3-7-sonnet-latest",
+        messages=messages,
+        thinking={"type": "enabled", "budget_tokens": 1024},
+    )
+
+    print(f"response 2: {response_2.model_dump_json(indent=4)}")
+    assert response_2._hidden_params["cache_hit"] == True
+    assert response_2.choices[0].message.reasoning_content is not None