fix(health.md): add rerank model health check information (#7295)

* fix(health.md): add rerank model health check information * build(model_prices_and_context_window.json): add gemini 2.0 for google ai studio - pricing + commercial rate limits * build(model_prices_and_context_window.json): add gemini-2.0 supports audio output = true * docs(team_model_add.md): clarify allowing teams to add models is an enterprise feature * fix(o1_transformation.py): add support for 'n', 'response_format' and 'stop' params for o1 and 'stream_options' param for o1-mini * build(model_prices_and_context_window.json): add 'supports_system_message' to supporting openai models needed as o1-preview, and o1-mini models don't support 'system message * fix(o1_transformation.py): translate system message based on if o1 model supports it * fix(o1_transformation.py): return 'stream' param support if o1-mini/o1-preview o1 currently doesn't support streaming, but the other model versions do Fixes https://github.com/BerriAI/litellm/issues/7292 * fix(o1_transformation.py): return tool calling/response_format in supported params if model map says so Fixes https://github.com/BerriAI/litellm/issues/7292 * fix: fix linting errors * fix: update '_transform_messages' * fix(o1_transformation.py): fix provider passed for supported param checks * test(base_llm_unit_tests.py): skip test if api takes >5s to respond * fix(utils.py): return false in 'supports_factory' if can't find value * fix(o1_transformation.py): always return stream + stream_options as supported params + handle stream options being passed in for azure o1 * feat(openai.py): support stream faking natively in openai handler Allows o1 calls to be faked for just the "o1" model, allows native streaming for o1-mini, o1-preview Fixes https://github.com/BerriAI/litellm/issues/7292 * fix(openai.py): use inference param instead of original optional param
2025-04-27 03:34:10 +00:00 · 2024-12-18 19:18:10 -08:00 · 2024-12-18 19:18:10 -08:00 · 1a4910f6c0
commit 1a4910f6c0
parent e95820367f
34 changed files with 800 additions and 515 deletions
--- a/tests/llm_translation/test_openai_o1.py
+++ b/tests/llm_translation/test_openai_o1.py
@ -17,14 +17,19 @@ import litellm
 from litellm import Choices, Message, ModelResponse


+@pytest.mark.parametrize("model", ["o1-preview", "o1-mini", "o1"])
@pytest.mark.asyncio
-async def test_o1_handle_system_role():
+async def test_o1_handle_system_role(model):
    """
    Tests that:
    - max_tokens is translated to 'max_completion_tokens'
    - role 'system' is translated to 'user'
    """
    from openai import AsyncOpenAI
+    from litellm.utils import supports_system_messages
+
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")

    litellm.set_verbose = True

@ -35,9 +40,9 @@ async def test_o1_handle_system_role():
    ) as mock_client:
        try:
            await litellm.acompletion(
-                model="o1-preview",
+                model=model,
                max_tokens=10,
-                messages=[{"role": "system", "content": "Hello!"}],
+                messages=[{"role": "system", "content": "Be a good bot!"}],
                client=client,
            )
        except Exception as e:
@ -48,9 +53,73 @@ async def test_o1_handle_system_role():

        print("request_body: ", request_body)

-        assert request_body["model"] == "o1-preview"
+        assert request_body["model"] == model
        assert request_body["max_completion_tokens"] == 10
-        assert request_body["messages"] == [{"role": "user", "content": "Hello!"}]
+        if supports_system_messages(model, "openai"):
+            assert request_body["messages"] == [
+                {"role": "system", "content": "Be a good bot!"}
+            ]
+        else:
+            assert request_body["messages"] == [
+                {"role": "user", "content": "Be a good bot!"}
+            ]
+
+
+@pytest.mark.parametrize(
+    "model, expected_tool_calling_support",
+    [("o1-preview", False), ("o1-mini", False), ("o1", True)],
+)
+@pytest.mark.asyncio
+async def test_o1_handle_tool_calling_optional_params(
+    model, expected_tool_calling_support
+):
+    """
+    Tests that:
+    - max_tokens is translated to 'max_completion_tokens'
+    - role 'system' is translated to 'user'
+    """
+    from openai import AsyncOpenAI
+    from litellm.utils import ProviderConfigManager
+    from litellm.types.utils import LlmProviders
+
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    config = ProviderConfigManager.get_provider_chat_config(
+        model=model, provider=LlmProviders.OPENAI
+    )
+
+    supported_params = config.get_supported_openai_params(model=model)
+
+    assert expected_tool_calling_support == ("tools" in supported_params)
+
+
+# @pytest.mark.parametrize(
+#     "model",
+#     ["o1"],  # "o1-preview", "o1-mini",
+# )
+# @pytest.mark.asyncio
+# async def test_o1_handle_streaming_e2e(model):
+#     """
+#     Tests that:
+#     - max_tokens is translated to 'max_completion_tokens'
+#     - role 'system' is translated to 'user'
+#     """
+#     from openai import AsyncOpenAI
+#     from litellm.utils import ProviderConfigManager
+#     from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
+#     from litellm.types.utils import LlmProviders
+
+#     resp = litellm.completion(
+#         model=model,
+#         messages=[{"role": "user", "content": "Hello!"}],
+#         stream=True,
+#     )
+#     assert isinstance(resp, CustomStreamWrapper)
+#     for chunk in resp:
+#         print("chunk: ", chunk)
+
+#     assert True


@pytest.mark.asyncio