mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
Support caching on reasoning content + other fixes (#8973)
* fix(factory.py): pass on anthropic thinking content from assistant call * fix(factory.py): fix anthropic messages to handle thinking blocks Fixes https://github.com/BerriAI/litellm/issues/8961 * fix(factory.py): fix bedrock handling for assistant content in messages Fixes https://github.com/BerriAI/litellm/issues/8961 * feat(convert_dict_to_response.py): handle reasoning content + thinking blocks in chat completion block ensures caching works for anthropic thinking block * fix(convert_dict_to_response.py): pass all message params to delta block ensures streaming delta also contains the reasoning content / thinking block * test(test_prompt_factory.py): remove redundant test anthropic now supports assistant as the first message * fix(factory.py): fix linting errors * fix: fix code qa * test: remove falsy test * fix(litellm_logging.py): fix str conversion
This commit is contained in:
parent
4c8b4fefc9
commit
662c59adcf
11 changed files with 230 additions and 50 deletions
|
@ -257,6 +257,8 @@ def test_aaparallel_function_call_with_anthropic_thinking(model):
|
|||
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||
) # get a new response from the model where it can see the function response
|
||||
print("second response\n", second_response)
|
||||
|
||||
## THIRD RESPONSE
|
||||
except litellm.InternalServerError as e:
|
||||
print(e)
|
||||
except litellm.RateLimitError as e:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue