mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
fix(health.md): add rerank model health check information (#7295)
* fix(health.md): add rerank model health check information * build(model_prices_and_context_window.json): add gemini 2.0 for google ai studio - pricing + commercial rate limits * build(model_prices_and_context_window.json): add gemini-2.0 supports audio output = true * docs(team_model_add.md): clarify allowing teams to add models is an enterprise feature * fix(o1_transformation.py): add support for 'n', 'response_format' and 'stop' params for o1 and 'stream_options' param for o1-mini * build(model_prices_and_context_window.json): add 'supports_system_message' to supporting openai models needed as o1-preview, and o1-mini models don't support 'system message * fix(o1_transformation.py): translate system message based on if o1 model supports it * fix(o1_transformation.py): return 'stream' param support if o1-mini/o1-preview o1 currently doesn't support streaming, but the other model versions do Fixes https://github.com/BerriAI/litellm/issues/7292 * fix(o1_transformation.py): return tool calling/response_format in supported params if model map says so Fixes https://github.com/BerriAI/litellm/issues/7292 * fix: fix linting errors * fix: update '_transform_messages' * fix(o1_transformation.py): fix provider passed for supported param checks * test(base_llm_unit_tests.py): skip test if api takes >5s to respond * fix(utils.py): return false in 'supports_factory' if can't find value * fix(o1_transformation.py): always return stream + stream_options as supported params + handle stream options being passed in for azure o1 * feat(openai.py): support stream faking natively in openai handler Allows o1 calls to be faked for just the "o1" model, allows native streaming for o1-mini, o1-preview Fixes https://github.com/BerriAI/litellm/issues/7292 * fix(openai.py): use inference param instead of original optional param
This commit is contained in:
parent
e95820367f
commit
1a4910f6c0
34 changed files with 800 additions and 515 deletions
|
@ -17,14 +17,19 @@ import litellm
|
|||
from litellm import Choices, Message, ModelResponse
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", ["o1-preview", "o1-mini", "o1"])
|
||||
@pytest.mark.asyncio
|
||||
async def test_o1_handle_system_role():
|
||||
async def test_o1_handle_system_role(model):
|
||||
"""
|
||||
Tests that:
|
||||
- max_tokens is translated to 'max_completion_tokens'
|
||||
- role 'system' is translated to 'user'
|
||||
"""
|
||||
from openai import AsyncOpenAI
|
||||
from litellm.utils import supports_system_messages
|
||||
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
|
@ -35,9 +40,9 @@ async def test_o1_handle_system_role():
|
|||
) as mock_client:
|
||||
try:
|
||||
await litellm.acompletion(
|
||||
model="o1-preview",
|
||||
model=model,
|
||||
max_tokens=10,
|
||||
messages=[{"role": "system", "content": "Hello!"}],
|
||||
messages=[{"role": "system", "content": "Be a good bot!"}],
|
||||
client=client,
|
||||
)
|
||||
except Exception as e:
|
||||
|
@ -48,9 +53,73 @@ async def test_o1_handle_system_role():
|
|||
|
||||
print("request_body: ", request_body)
|
||||
|
||||
assert request_body["model"] == "o1-preview"
|
||||
assert request_body["model"] == model
|
||||
assert request_body["max_completion_tokens"] == 10
|
||||
assert request_body["messages"] == [{"role": "user", "content": "Hello!"}]
|
||||
if supports_system_messages(model, "openai"):
|
||||
assert request_body["messages"] == [
|
||||
{"role": "system", "content": "Be a good bot!"}
|
||||
]
|
||||
else:
|
||||
assert request_body["messages"] == [
|
||||
{"role": "user", "content": "Be a good bot!"}
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, expected_tool_calling_support",
|
||||
[("o1-preview", False), ("o1-mini", False), ("o1", True)],
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_o1_handle_tool_calling_optional_params(
|
||||
model, expected_tool_calling_support
|
||||
):
|
||||
"""
|
||||
Tests that:
|
||||
- max_tokens is translated to 'max_completion_tokens'
|
||||
- role 'system' is translated to 'user'
|
||||
"""
|
||||
from openai import AsyncOpenAI
|
||||
from litellm.utils import ProviderConfigManager
|
||||
from litellm.types.utils import LlmProviders
|
||||
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
config = ProviderConfigManager.get_provider_chat_config(
|
||||
model=model, provider=LlmProviders.OPENAI
|
||||
)
|
||||
|
||||
supported_params = config.get_supported_openai_params(model=model)
|
||||
|
||||
assert expected_tool_calling_support == ("tools" in supported_params)
|
||||
|
||||
|
||||
# @pytest.mark.parametrize(
|
||||
# "model",
|
||||
# ["o1"], # "o1-preview", "o1-mini",
|
||||
# )
|
||||
# @pytest.mark.asyncio
|
||||
# async def test_o1_handle_streaming_e2e(model):
|
||||
# """
|
||||
# Tests that:
|
||||
# - max_tokens is translated to 'max_completion_tokens'
|
||||
# - role 'system' is translated to 'user'
|
||||
# """
|
||||
# from openai import AsyncOpenAI
|
||||
# from litellm.utils import ProviderConfigManager
|
||||
# from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
|
||||
# from litellm.types.utils import LlmProviders
|
||||
|
||||
# resp = litellm.completion(
|
||||
# model=model,
|
||||
# messages=[{"role": "user", "content": "Hello!"}],
|
||||
# stream=True,
|
||||
# )
|
||||
# assert isinstance(resp, CustomStreamWrapper)
|
||||
# for chunk in resp:
|
||||
# print("chunk: ", chunk)
|
||||
|
||||
# assert True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue