mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Gemini-2.5-flash - support reasoning cost calc + return reasoning content (#10141)
* build(model_prices_and_context_window.json): add vertex ai gemini-2.5-flash pricing * build(model_prices_and_context_window.json): add gemini reasoning token pricing * fix(vertex_and_google_ai_studio_gemini.py): support counting thinking tokens for gemini allows accurate cost calc * fix(utils.py): add reasoning token cost calc to generic cost calc ensures gemini-2.5-flash cost calculation is accurate * build(model_prices_and_context_window.json): mark gemini-2.5-flash as 'supports_reasoning' * feat(gemini/): support 'thinking' + 'reasoning_effort' params + new unit tests allow controlling thinking effort for gemini-2.5-flash models * test: update unit testing * feat(vertex_and_google_ai_studio_gemini.py): return reasoning content if given in gemini response * test: update model name * fix: fix ruff check * test(test_spend_management_endpoints.py): update tests to be less sensitive to new keys / updates to usage object * fix(vertex_and_google_ai_studio_gemini.py): fix translation
This commit is contained in:
parent
db4ebe10c8
commit
36308a31be
16 changed files with 453 additions and 88 deletions
|
@ -76,6 +76,11 @@ class BaseLLMChatTest(ABC):
|
|||
"""Must return the base completion call args"""
|
||||
pass
|
||||
|
||||
|
||||
def get_base_completion_call_args_with_reasoning_model(self) -> dict:
|
||||
"""Must return the base completion call args with reasoning_effort"""
|
||||
return {}
|
||||
|
||||
def test_developer_role_translation(self):
|
||||
"""
|
||||
Test that the developer role is translated correctly for non-OpenAI providers.
|
||||
|
@ -1126,6 +1131,46 @@ class BaseLLMChatTest(ABC):
|
|||
|
||||
print(response)
|
||||
|
||||
def test_reasoning_effort(self):
|
||||
"""Test that reasoning_effort is passed correctly to the model"""
|
||||
from litellm.utils import supports_reasoning
|
||||
from litellm import completion
|
||||
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
base_completion_call_args = self.get_base_completion_call_args_with_reasoning_model()
|
||||
if len(base_completion_call_args) == 0:
|
||||
print("base_completion_call_args is empty")
|
||||
pytest.skip("Model does not support reasoning")
|
||||
if not supports_reasoning(base_completion_call_args["model"], None):
|
||||
print("Model does not support reasoning")
|
||||
pytest.skip("Model does not support reasoning")
|
||||
|
||||
_, provider, _, _ = litellm.get_llm_provider(
|
||||
model=base_completion_call_args["model"]
|
||||
)
|
||||
|
||||
## CHECK PARAM MAPPING
|
||||
optional_params = get_optional_params(
|
||||
model=base_completion_call_args["model"],
|
||||
custom_llm_provider=provider,
|
||||
reasoning_effort="high",
|
||||
)
|
||||
# either accepts reasoning effort or thinking budget
|
||||
assert "reasoning_effort" in optional_params or "4096" in json.dumps(optional_params)
|
||||
|
||||
try:
|
||||
litellm._turn_on_debug()
|
||||
response = completion(
|
||||
**base_completion_call_args,
|
||||
reasoning_effort="low",
|
||||
messages=[{"role": "user", "content": "Hello!"}],
|
||||
)
|
||||
print(f"response: {response}")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error: {e}")
|
||||
|
||||
|
||||
|
||||
class BaseOSeriesModelsTest(ABC): # test across azure/openai
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue