Support caching on reasoning content + other fixes (#8973)

* fix(factory.py): pass on anthropic thinking content from assistant call

* fix(factory.py): fix anthropic messages to handle thinking blocks

Fixes https://github.com/BerriAI/litellm/issues/8961

* fix(factory.py): fix bedrock handling for assistant content in messages

Fixes https://github.com/BerriAI/litellm/issues/8961

* feat(convert_dict_to_response.py): handle reasoning content + thinking blocks in chat completion block

ensures caching works for anthropic thinking block

* fix(convert_dict_to_response.py): pass all message params to delta block

ensures streaming delta also contains the reasoning content / thinking block

* test(test_prompt_factory.py): remove redundant test

anthropic now supports assistant as the first message

* fix(factory.py): fix linting errors

* fix: fix code qa

* test: remove falsy test

* fix(litellm_logging.py): fix str conversion
This commit is contained in:
Krish Dholakia 2025-03-04 21:12:16 -08:00 committed by GitHub
parent 4c8b4fefc9
commit 662c59adcf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 230 additions and 50 deletions

View file

@ -2561,3 +2561,30 @@ def test_redis_caching_multiple_namespaces():
# request 4 without a namespace should not be cached under the same key as request 3
assert response_4.id != response_3.id
def test_caching_with_reasoning_content():
"""
Test that reasoning content is cached
"""
import uuid
messages = [{"role": "user", "content": f"what is litellm? {uuid.uuid4()}"}]
litellm.cache = Cache()
response_1 = completion(
model="anthropic/claude-3-7-sonnet-latest",
messages=messages,
thinking={"type": "enabled", "budget_tokens": 1024},
)
response_2 = completion(
model="anthropic/claude-3-7-sonnet-latest",
messages=messages,
thinking={"type": "enabled", "budget_tokens": 1024},
)
print(f"response 2: {response_2.model_dump_json(indent=4)}")
assert response_2._hidden_params["cache_hit"] == True
assert response_2.choices[0].message.reasoning_content is not None