mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Support caching on reasoning content + other fixes (#8973)
* fix(factory.py): pass on anthropic thinking content from assistant call * fix(factory.py): fix anthropic messages to handle thinking blocks Fixes https://github.com/BerriAI/litellm/issues/8961 * fix(factory.py): fix bedrock handling for assistant content in messages Fixes https://github.com/BerriAI/litellm/issues/8961 * feat(convert_dict_to_response.py): handle reasoning content + thinking blocks in chat completion block ensures caching works for anthropic thinking block * fix(convert_dict_to_response.py): pass all message params to delta block ensures streaming delta also contains the reasoning content / thinking block * test(test_prompt_factory.py): remove redundant test anthropic now supports assistant as the first message * fix(factory.py): fix linting errors * fix: fix code qa * test: remove falsy test * fix(litellm_logging.py): fix str conversion
This commit is contained in:
parent
4c8b4fefc9
commit
662c59adcf
11 changed files with 230 additions and 50 deletions
|
@ -2561,3 +2561,30 @@ def test_redis_caching_multiple_namespaces():
|
|||
|
||||
# request 4 without a namespace should not be cached under the same key as request 3
|
||||
assert response_4.id != response_3.id
|
||||
|
||||
|
||||
def test_caching_with_reasoning_content():
|
||||
"""
|
||||
Test that reasoning content is cached
|
||||
"""
|
||||
|
||||
import uuid
|
||||
|
||||
messages = [{"role": "user", "content": f"what is litellm? {uuid.uuid4()}"}]
|
||||
litellm.cache = Cache()
|
||||
|
||||
response_1 = completion(
|
||||
model="anthropic/claude-3-7-sonnet-latest",
|
||||
messages=messages,
|
||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||
)
|
||||
|
||||
response_2 = completion(
|
||||
model="anthropic/claude-3-7-sonnet-latest",
|
||||
messages=messages,
|
||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||
)
|
||||
|
||||
print(f"response 2: {response_2.model_dump_json(indent=4)}")
|
||||
assert response_2._hidden_params["cache_hit"] == True
|
||||
assert response_2.choices[0].message.reasoning_content is not None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue