[Feat] Add max_completion_tokens param (#5691)

* add max_completion_tokens * add max_completion_tokens * add max_completion_tokens support for OpenAI models * add max_completion_tokens param * add max_completion_tokens for bedrock converse models * add test for converse maxTokens * fix openai o1 param mapping test * move test optional params * add max_completion_tokens for anthropic api * fix conftest * add max_completion tokens for vertex ai partner models * add max_completion_tokens for fireworks ai * add max_completion_tokens for hf rest api * add test for param mapping * add param mapping for vertex, gemini + testing * predibase is the most unstable and unusable llm api in prod, can't handle our ci/cd * add max_completion_tokens to openai supported params * fix fireworks ai param mapping
2024-09-14 14:57:01 -07:00 · 2024-09-14 14:57:01 -07:00 · 85acdb9193
commit 85acdb9193
parent 415a3ede9e
31 changed files with 591 additions and 35 deletions
--- a/tests/llm_translation/conftest.py
+++ b/tests/llm_translation/conftest.py
@ -0,0 +1,54 @@
+# conftest.py
+
+import importlib
+import os
+import sys
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import litellm
+
+
+@pytest.fixture(scope="function", autouse=True)
+def setup_and_teardown():
+    """
+    This fixture reloads litellm before every function. To speed up testing by removing callbacks being chained.
+    """
+    curr_dir = os.getcwd()  # Get the current working directory
+    sys.path.insert(
+        0, os.path.abspath("../..")
+    )  # Adds the project directory to the system path
+
+    import litellm
+    from litellm import Router
+
+    importlib.reload(litellm)
+    import asyncio
+
+    loop = asyncio.get_event_loop_policy().new_event_loop()
+    asyncio.set_event_loop(loop)
+    print(litellm)
+    # from litellm import Router, completion, aembedding, acompletion, embedding
+    yield
+
+    # Teardown code (executes after the yield point)
+    loop.close()  # Close the loop created earlier
+    asyncio.set_event_loop(None)  # Remove the reference to the loop
+
+
+def pytest_collection_modifyitems(config, items):
+    # Separate tests in 'test_amazing_proxy_custom_logger.py' and other tests
+    custom_logger_tests = [
+        item for item in items if "custom_logger" in item.parent.name
+    ]
+    other_tests = [item for item in items if "custom_logger" not in item.parent.name]
+
+    # Sort tests based on their names
+    custom_logger_tests.sort(key=lambda x: x.name)
+    other_tests.sort(key=lambda x: x.name)
+
+    # Reorder the items list
+    items[:] = custom_logger_tests + other_tests
--- a/tests/llm_translation/test_fireworks_ai_translation.py
+++ b/tests/llm_translation/test_fireworks_ai_translation.py
@ -0,0 +1,32 @@
+import os
+import sys
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+from litellm.llms.fireworks_ai import FireworksAIConfig
+
+fireworks = FireworksAIConfig()
+
+
+def test_map_openai_params_tool_choice():
+    # Test case 1: tool_choice is "required"
+    result = fireworks.map_openai_params({"tool_choice": "required"}, {}, "some_model")
+    assert result == {"tool_choice": "any"}
+
+    # Test case 2: tool_choice is "auto"
+    result = fireworks.map_openai_params({"tool_choice": "auto"}, {}, "some_model")
+    assert result == {"tool_choice": "auto"}
+
+    # Test case 3: tool_choice is not present
+    result = fireworks.map_openai_params(
+        {"some_other_param": "value"}, {}, "some_model"
+    )
+    assert result == {}
+
+    # Test case 4: tool_choice is None
+    result = fireworks.map_openai_params({"tool_choice": None}, {}, "some_model")
+    assert result == {"tool_choice": None}
--- a/tests/llm_translation/test_max_completion_tokens.py
+++ b/tests/llm_translation/test_max_completion_tokens.py
@ -0,0 +1,342 @@
+import json
+import os
+import sys
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+from datetime import datetime
+from unittest.mock import AsyncMock
+from dotenv import load_dotenv
+
+load_dotenv()
+import httpx
+import pytest
+from respx import MockRouter
+
+import litellm
+from litellm import Choices, Message, ModelResponse
+
+# Adds the parent directory to the system path
+
+
+def return_mocked_response(model: str):
+    if model == "bedrock/mistral.mistral-large-2407-v1:0":
+        return {
+            "metrics": {"latencyMs": 316},
+            "output": {
+                "message": {
+                    "content": [{"text": "Hello! How are you doing today? How can"}],
+                    "role": "assistant",
+                }
+            },
+            "stopReason": "max_tokens",
+            "usage": {"inputTokens": 5, "outputTokens": 10, "totalTokens": 15},
+        }
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "bedrock/mistral.mistral-large-2407-v1:0",
+    ],
+)
+@pytest.mark.respx
+@pytest.mark.asyncio()
+async def test_bedrock_max_completion_tokens(model: str, respx_mock: MockRouter):
+    """
+    Tests that:
+    - max_completion_tokens is passed as max_tokens to bedrock models
+    """
+    litellm.set_verbose = True
+
+    mock_response = return_mocked_response(model)
+    _model = model.split("/")[1]
+    print("\n\nmock_response: ", mock_response)
+    url = f"https://bedrock-runtime.us-west-2.amazonaws.com/model/{_model}/converse"
+    mock_request = respx_mock.post(url).mock(
+        return_value=httpx.Response(200, json=mock_response)
+    )
+
+    response = await litellm.acompletion(
+        model=model,
+        max_completion_tokens=10,
+        messages=[{"role": "user", "content": "Hello!"}],
+    )
+
+    assert mock_request.called
+    request_body = json.loads(mock_request.calls[0].request.content)
+
+    print("request_body: ", request_body)
+
+    assert request_body == {
+        "messages": [{"role": "user", "content": [{"text": "Hello!"}]}],
+        "additionalModelRequestFields": {},
+        "system": [],
+        "inferenceConfig": {"maxTokens": 10},
+    }
+    print(f"response: {response}")
+    assert isinstance(response, ModelResponse)
+
+
+@pytest.mark.parametrize(
+    "model",
+    ["anthropic/claude-3-sonnet-20240229", "anthropic/claude-3-opus-20240229,"],
+)
+@pytest.mark.respx
+@pytest.mark.asyncio()
+async def test_anthropic_api_max_completion_tokens(model: str, respx_mock: MockRouter):
+    """
+    Tests that:
+    - max_completion_tokens is passed as max_tokens to anthropic models
+    """
+    litellm.set_verbose = True
+
+    mock_response = {
+        "content": [{"text": "Hi! My name is Claude.", "type": "text"}],
+        "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+        "model": "claude-3-5-sonnet-20240620",
+        "role": "assistant",
+        "stop_reason": "end_turn",
+        "stop_sequence": None,
+        "type": "message",
+        "usage": {"input_tokens": 2095, "output_tokens": 503},
+    }
+
+    print("\n\nmock_response: ", mock_response)
+    url = f"https://api.anthropic.com/v1/messages"
+    mock_request = respx_mock.post(url).mock(
+        return_value=httpx.Response(200, json=mock_response)
+    )
+
+    response = await litellm.acompletion(
+        model=model,
+        max_completion_tokens=10,
+        messages=[{"role": "user", "content": "Hello!"}],
+    )
+
+    assert mock_request.called
+    request_body = json.loads(mock_request.calls[0].request.content)
+
+    print("request_body: ", request_body)
+
+    assert request_body == {
+        "messages": [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}],
+        "max_tokens": 10,
+        "model": model.split("/")[-1],
+    }
+    print(f"response: {response}")
+    assert isinstance(response, ModelResponse)
+
+
+def test_all_model_configs():
+    from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.ai21.transformation import (
+        VertexAIAi21Config,
+    )
+    from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.llama3.transformation import (
+        VertexAILlama3Config,
+    )
+
+    assert (
+        "max_completion_tokens" in VertexAILlama3Config().get_supported_openai_params()
+    )
+    assert VertexAILlama3Config().map_openai_params(
+        {"max_completion_tokens": 10}, {}, "llama3"
+    ) == {"max_tokens": 10}
+
+    assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params()
+    assert VertexAIAi21Config().map_openai_params(
+        {"max_completion_tokens": 10}, {}, "llama3"
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.fireworks_ai import FireworksAIConfig
+
+    assert "max_completion_tokens" in FireworksAIConfig().get_supported_openai_params()
+    assert FireworksAIConfig().map_openai_params(
+        {"max_completion_tokens": 10}, {}, "llama3"
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.huggingface_restapi import HuggingfaceConfig
+
+    assert "max_completion_tokens" in HuggingfaceConfig().get_supported_openai_params()
+    assert HuggingfaceConfig().map_openai_params({"max_completion_tokens": 10}, {}) == {
+        "max_new_tokens": 10
+    }
+
+    from litellm.llms.nvidia_nim import NvidiaNimConfig
+
+    assert "max_completion_tokens" in NvidiaNimConfig().get_supported_openai_params(
+        model="llama3"
+    )
+    assert NvidiaNimConfig().map_openai_params(
+        model="llama3",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.ollama_chat import OllamaChatConfig
+
+    assert "max_completion_tokens" in OllamaChatConfig().get_supported_openai_params()
+    assert OllamaChatConfig().map_openai_params(
+        model="llama3",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"num_predict": 10}
+
+    from litellm.llms.predibase import PredibaseConfig
+
+    assert "max_completion_tokens" in PredibaseConfig().get_supported_openai_params()
+    assert PredibaseConfig().map_openai_params(
+        {"max_completion_tokens": 10},
+        {},
+    ) == {"max_new_tokens": 10}
+
+    from litellm.llms.text_completion_codestral import MistralTextCompletionConfig
+
+    assert (
+        "max_completion_tokens"
+        in MistralTextCompletionConfig().get_supported_openai_params()
+    )
+    assert MistralTextCompletionConfig().map_openai_params(
+        {"max_completion_tokens": 10},
+        {},
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.volcengine import VolcEngineConfig
+
+    assert "max_completion_tokens" in VolcEngineConfig().get_supported_openai_params(
+        model="llama3"
+    )
+    assert VolcEngineConfig().map_openai_params(
+        model="llama3",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.AI21.chat import AI21ChatConfig
+
+    assert "max_completion_tokens" in AI21ChatConfig().get_supported_openai_params(
+        "jamba-1.5-mini@001"
+    )
+    assert AI21ChatConfig().map_openai_params(
+        model="jamba-1.5-mini@001",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.AzureOpenAI.azure import AzureOpenAIConfig
+
+    assert "max_completion_tokens" in AzureOpenAIConfig().get_supported_openai_params()
+    assert AzureOpenAIConfig().map_openai_params(
+        model="gpt-3.5-turbo",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+        api_version="2022-12-01",
+        drop_params=False,
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.bedrock.chat import AmazonConverseConfig
+
+    assert (
+        "max_completion_tokens"
+        in AmazonConverseConfig().get_supported_openai_params(
+            model="anthropic.claude-3-sonnet-20240229-v1:0"
+        )
+    )
+    assert AmazonConverseConfig().map_openai_params(
+        model="anthropic.claude-3-sonnet-20240229-v1:0",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+        drop_params=False,
+    ) == {"maxTokens": 10}
+
+    from litellm.llms.text_completion_codestral import MistralTextCompletionConfig
+
+    assert (
+        "max_completion_tokens"
+        in MistralTextCompletionConfig().get_supported_openai_params()
+    )
+    assert MistralTextCompletionConfig().map_openai_params(
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.bedrock.common_utils import (
+        AmazonAnthropicClaude3Config,
+        AmazonAnthropicConfig,
+    )
+
+    assert (
+        "max_completion_tokens"
+        in AmazonAnthropicClaude3Config().get_supported_openai_params()
+    )
+
+    assert AmazonAnthropicClaude3Config().map_openai_params(
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens": 10}
+
+    assert (
+        "max_completion_tokens" in AmazonAnthropicConfig().get_supported_openai_params()
+    )
+
+    assert AmazonAnthropicConfig().map_openai_params(
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens_to_sample": 10}
+
+    from litellm.llms.databricks.chat import DatabricksConfig
+
+    assert "max_completion_tokens" in DatabricksConfig().get_supported_openai_params()
+
+    assert DatabricksConfig().map_openai_params(
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_anthropic import (
+        VertexAIAnthropicConfig,
+    )
+
+    assert (
+        "max_completion_tokens"
+        in VertexAIAnthropicConfig().get_supported_openai_params()
+    )
+
+    assert VertexAIAnthropicConfig().map_openai_params(
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
+        VertexAIConfig,
+        GoogleAIStudioGeminiConfig,
+        VertexGeminiConfig,
+    )
+
+    assert "max_completion_tokens" in VertexAIConfig().get_supported_openai_params()
+
+    assert VertexAIConfig().map_openai_params(
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_output_tokens": 10}
+
+    assert (
+        "max_completion_tokens"
+        in GoogleAIStudioGeminiConfig().get_supported_openai_params()
+    )
+
+    assert GoogleAIStudioGeminiConfig().map_openai_params(
+        model="gemini-1.0-pro",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_output_tokens": 10}
+
+    assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params()
+
+    assert VertexGeminiConfig().map_openai_params(
+        model="gemini-1.0-pro",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+        drop_params=False,
+    ) == {"max_output_tokens": 10}
--- a/tests/llm_translation/test_openai_o1.py
+++ b/tests/llm_translation/test_openai_o1.py
@ -0,0 +1,101 @@
+import json
+import os
+import sys
+from datetime import datetime
+from unittest.mock import AsyncMock
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+
+import httpx
+import pytest
+from respx import MockRouter
+
+import litellm
+from litellm import Choices, Message, ModelResponse
+
+
+@pytest.mark.asyncio
+@pytest.mark.respx
+async def test_o1_handle_system_role(respx_mock: MockRouter):
+    """
+    Tests that:
+    - max_tokens is translated to 'max_completion_tokens'
+    - role 'system' is translated to 'user'
+    """
+    litellm.set_verbose = True
+
+    mock_response = ModelResponse(
+        id="cmpl-mock",
+        choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
+        created=int(datetime.now().timestamp()),
+        model="o1-preview",
+    )
+
+    mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
+        return_value=httpx.Response(200, json=mock_response.dict())
+    )
+
+    response = await litellm.acompletion(
+        model="o1-preview",
+        max_tokens=10,
+        messages=[{"role": "system", "content": "Hello!"}],
+    )
+
+    assert mock_request.called
+    request_body = json.loads(mock_request.calls[0].request.content)
+
+    print("request_body: ", request_body)
+
+    assert request_body == {
+        "model": "o1-preview",
+        "max_completion_tokens": 10,
+        "messages": [{"role": "user", "content": "Hello!"}],
+    }
+
+    print(f"response: {response}")
+    assert isinstance(response, ModelResponse)
+
+
+@pytest.mark.asyncio
+@pytest.mark.respx
+@pytest.mark.parametrize("model", ["gpt-4", "gpt-4-0314", "gpt-4-32k", "o1-preview"])
+async def test_o1_max_completion_tokens(respx_mock: MockRouter, model: str):
+    """
+    Tests that:
+    - max_completion_tokens is passed directly to OpenAI chat completion models
+    """
+    litellm.set_verbose = True
+
+    mock_response = ModelResponse(
+        id="cmpl-mock",
+        choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
+        created=int(datetime.now().timestamp()),
+        model=model,
+    )
+
+    mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
+        return_value=httpx.Response(200, json=mock_response.dict())
+    )
+
+    response = await litellm.acompletion(
+        model=model,
+        max_completion_tokens=10,
+        messages=[{"role": "user", "content": "Hello!"}],
+    )
+
+    assert mock_request.called
+    request_body = json.loads(mock_request.calls[0].request.content)
+
+    print("request_body: ", request_body)
+
+    assert request_body == {
+        "model": model,
+        "max_completion_tokens": 10,
+        "messages": [{"role": "user", "content": "Hello!"}],
+    }
+
+    print(f"response: {response}")
+    assert isinstance(response, ModelResponse)
--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@ -0,0 +1,602 @@
+#### What this tests ####
+#    This tests if get_optional_params works as expected
+import asyncio
+import inspect
+import os
+import sys
+import time
+import traceback
+
+import pytest
+
+sys.path.insert(0, os.path.abspath("../.."))
+from unittest.mock import MagicMock, patch
+
+import litellm
+from litellm.llms.prompt_templates.factory import map_system_message_pt
+from litellm.types.completion import (
+    ChatCompletionMessageParam,
+    ChatCompletionSystemMessageParam,
+    ChatCompletionUserMessageParam,
+)
+from litellm.utils import (
+    get_optional_params,
+    get_optional_params_embeddings,
+    get_optional_params_image_gen,
+)
+
+## get_optional_params_embeddings
+### Models: OpenAI, Azure, Bedrock
+### Scenarios: w/ optional params + litellm.drop_params = True
+
+
+def test_supports_system_message():
+    """
+    Check if litellm.completion(...,supports_system_message=False)
+    """
+    messages = [
+        ChatCompletionSystemMessageParam(role="system", content="Listen here!"),
+        ChatCompletionUserMessageParam(role="user", content="Hello there!"),
+    ]
+
+    new_messages = map_system_message_pt(messages=messages)
+
+    assert len(new_messages) == 1
+    assert new_messages[0]["role"] == "user"
+
+    ## confirm you can make a openai call with this param
+
+    response = litellm.completion(
+        model="gpt-3.5-turbo", messages=new_messages, supports_system_message=False
+    )
+
+    assert isinstance(response, litellm.ModelResponse)
+
+
+@pytest.mark.parametrize(
+    "stop_sequence, expected_count", [("\n", 0), (["\n"], 0), (["finish_reason"], 1)]
+)
+def test_anthropic_optional_params(stop_sequence, expected_count):
+    """
+    Test if whitespace character optional param is dropped by anthropic
+    """
+    litellm.drop_params = True
+    optional_params = get_optional_params(
+        model="claude-3", custom_llm_provider="anthropic", stop=stop_sequence
+    )
+    assert len(optional_params) == expected_count
+
+
+def test_bedrock_optional_params_embeddings():
+    litellm.drop_params = True
+    optional_params = get_optional_params_embeddings(
+        model="", user="John", encoding_format=None, custom_llm_provider="bedrock"
+    )
+    assert len(optional_params) == 0
+
+
+@pytest.mark.parametrize(
+    "model, expected_dimensions, dimensions_kwarg",
+    [
+        ("bedrock/amazon.titan-embed-text-v1", False, None),
+        ("bedrock/amazon.titan-embed-image-v1", True, "embeddingConfig"),
+        ("bedrock/amazon.titan-embed-text-v2:0", True, "dimensions"),
+        ("bedrock/cohere.embed-multilingual-v3", False, None),
+    ],
+)
+def test_bedrock_optional_params_embeddings_dimension(
+    model, expected_dimensions, dimensions_kwarg
+):
+    litellm.drop_params = True
+    optional_params = get_optional_params_embeddings(
+        model=model,
+        user="John",
+        encoding_format=None,
+        dimensions=20,
+        custom_llm_provider="bedrock",
+    )
+    if expected_dimensions:
+        assert len(optional_params) == 1
+    else:
+        assert len(optional_params) == 0
+
+    if dimensions_kwarg is not None:
+        assert dimensions_kwarg in optional_params
+
+
+def test_google_ai_studio_optional_params_embeddings():
+    optional_params = get_optional_params_embeddings(
+        model="",
+        user="John",
+        encoding_format=None,
+        custom_llm_provider="gemini",
+        drop_params=True,
+    )
+    assert len(optional_params) == 0
+
+
+def test_openai_optional_params_embeddings():
+    litellm.drop_params = True
+    optional_params = get_optional_params_embeddings(
+        model="", user="John", encoding_format=None, custom_llm_provider="openai"
+    )
+    assert len(optional_params) == 1
+    assert optional_params["user"] == "John"
+
+
+def test_azure_optional_params_embeddings():
+    litellm.drop_params = True
+    optional_params = get_optional_params_embeddings(
+        model="chatgpt-v-2",
+        user="John",
+        encoding_format=None,
+        custom_llm_provider="azure",
+    )
+    assert len(optional_params) == 1
+    assert optional_params["user"] == "John"
+
+
+def test_databricks_optional_params():
+    litellm.drop_params = True
+    optional_params = get_optional_params(
+        model="",
+        user="John",
+        custom_llm_provider="databricks",
+        max_tokens=10,
+        temperature=0.2,
+    )
+    print(f"optional_params: {optional_params}")
+    assert len(optional_params) == 2
+    assert "user" not in optional_params
+
+
+def test_gemini_optional_params():
+    litellm.drop_params = True
+    optional_params = get_optional_params(
+        model="",
+        custom_llm_provider="gemini",
+        max_tokens=10,
+        frequency_penalty=10,
+    )
+    print(f"optional_params: {optional_params}")
+    assert len(optional_params) == 1
+    assert "frequency_penalty" not in optional_params
+
+
+def test_azure_ai_mistral_optional_params():
+    litellm.drop_params = True
+    optional_params = get_optional_params(
+        model="mistral-large-latest",
+        user="John",
+        custom_llm_provider="openai",
+        max_tokens=10,
+        temperature=0.2,
+    )
+    assert "user" not in optional_params
+
+
+def test_vertex_ai_llama_3_optional_params():
+    litellm.vertex_llama3_models = ["meta/llama3-405b-instruct-maas"]
+    litellm.drop_params = True
+    optional_params = get_optional_params(
+        model="meta/llama3-405b-instruct-maas",
+        user="John",
+        custom_llm_provider="vertex_ai",
+        max_tokens=10,
+        temperature=0.2,
+    )
+    assert "user" not in optional_params
+
+
+def test_vertex_ai_mistral_optional_params():
+    litellm.vertex_mistral_models = ["mistral-large@2407"]
+    litellm.drop_params = True
+    optional_params = get_optional_params(
+        model="mistral-large@2407",
+        user="John",
+        custom_llm_provider="vertex_ai",
+        max_tokens=10,
+        temperature=0.2,
+    )
+    assert "user" not in optional_params
+    assert "max_tokens" in optional_params
+    assert "temperature" in optional_params
+
+
+def test_azure_gpt_optional_params_gpt_vision():
+    # for OpenAI, Azure all extra params need to get passed as extra_body to OpenAI python. We assert we actually set extra_body here
+    optional_params = litellm.utils.get_optional_params(
+        model="",
+        user="John",
+        custom_llm_provider="azure",
+        max_tokens=10,
+        temperature=0.2,
+        enhancements={"ocr": {"enabled": True}, "grounding": {"enabled": True}},
+        dataSources=[
+            {
+                "type": "AzureComputerVision",
+                "parameters": {
+                    "endpoint": "<your_computer_vision_endpoint>",
+                    "key": "<your_computer_vision_key>",
+                },
+            }
+        ],
+    )
+
+    print(optional_params)
+    assert optional_params["max_tokens"] == 10
+    assert optional_params["temperature"] == 0.2
+    assert optional_params["extra_body"] == {
+        "enhancements": {"ocr": {"enabled": True}, "grounding": {"enabled": True}},
+        "dataSources": [
+            {
+                "type": "AzureComputerVision",
+                "parameters": {
+                    "endpoint": "<your_computer_vision_endpoint>",
+                    "key": "<your_computer_vision_key>",
+                },
+            }
+        ],
+    }
+
+
+# test_azure_gpt_optional_params_gpt_vision()
+
+
+def test_azure_gpt_optional_params_gpt_vision_with_extra_body():
+    # if user passes extra_body, we should not over write it, we should pass it along to OpenAI python
+    optional_params = litellm.utils.get_optional_params(
+        model="",
+        user="John",
+        custom_llm_provider="azure",
+        max_tokens=10,
+        temperature=0.2,
+        extra_body={
+            "meta": "hi",
+        },
+        enhancements={"ocr": {"enabled": True}, "grounding": {"enabled": True}},
+        dataSources=[
+            {
+                "type": "AzureComputerVision",
+                "parameters": {
+                    "endpoint": "<your_computer_vision_endpoint>",
+                    "key": "<your_computer_vision_key>",
+                },
+            }
+        ],
+    )
+
+    print(optional_params)
+    assert optional_params["max_tokens"] == 10
+    assert optional_params["temperature"] == 0.2
+    assert optional_params["extra_body"] == {
+        "enhancements": {"ocr": {"enabled": True}, "grounding": {"enabled": True}},
+        "dataSources": [
+            {
+                "type": "AzureComputerVision",
+                "parameters": {
+                    "endpoint": "<your_computer_vision_endpoint>",
+                    "key": "<your_computer_vision_key>",
+                },
+            }
+        ],
+        "meta": "hi",
+    }
+
+
+# test_azure_gpt_optional_params_gpt_vision_with_extra_body()
+
+
+def test_openai_extra_headers():
+    optional_params = litellm.utils.get_optional_params(
+        model="",
+        user="John",
+        custom_llm_provider="openai",
+        max_tokens=10,
+        temperature=0.2,
+        extra_headers={"AI-Resource Group": "ishaan-resource"},
+    )
+
+    print(optional_params)
+    assert optional_params["max_tokens"] == 10
+    assert optional_params["temperature"] == 0.2
+    assert optional_params["extra_headers"] == {"AI-Resource Group": "ishaan-resource"}
+
+
+@pytest.mark.parametrize(
+    "api_version",
+    [
+        "2024-02-01",
+        "2024-07-01",  # potential future version with tool_choice="required" supported
+        "2023-07-01-preview",
+        "2024-03-01-preview",
+    ],
+)
+def test_azure_tool_choice(api_version):
+    """
+    Test azure tool choice on older + new version
+    """
+    litellm.drop_params = True
+    optional_params = litellm.utils.get_optional_params(
+        model="chatgpt-v-2",
+        user="John",
+        custom_llm_provider="azure",
+        max_tokens=10,
+        temperature=0.2,
+        extra_headers={"AI-Resource Group": "ishaan-resource"},
+        tool_choice="required",
+        api_version=api_version,
+    )
+
+    print(f"{optional_params}")
+    if api_version == "2024-07-01":
+        assert optional_params["tool_choice"] == "required"
+    else:
+        assert (
+            "tool_choice" not in optional_params
+        ), "tool choice should not be present. Got - tool_choice={} for api version={}".format(
+            optional_params["tool_choice"], api_version
+        )
+
+
+@pytest.mark.parametrize("drop_params", [True, False, None])
+def test_dynamic_drop_params(drop_params):
+    """
+    Make a call to cohere w/ drop params = True vs. false.
+    """
+    if drop_params is True:
+        optional_params = litellm.utils.get_optional_params(
+            model="command-r",
+            custom_llm_provider="cohere",
+            response_format={"type": "json"},
+            drop_params=drop_params,
+        )
+    else:
+        try:
+            optional_params = litellm.utils.get_optional_params(
+                model="command-r",
+                custom_llm_provider="cohere",
+                response_format={"type": "json"},
+                drop_params=drop_params,
+            )
+            pytest.fail("Expected to fail")
+        except Exception as e:
+            pass
+
+
+def test_dynamic_drop_params_e2e():
+    with patch("requests.post", new=MagicMock()) as mock_response:
+        try:
+            response = litellm.completion(
+                model="command-r",
+                messages=[{"role": "user", "content": "Hey, how's it going?"}],
+                response_format={"key": "value"},
+                drop_params=True,
+            )
+        except Exception as e:
+            pass
+
+        mock_response.assert_called_once()
+        print(mock_response.call_args.kwargs["data"])
+        assert "response_format" not in mock_response.call_args.kwargs["data"]
+
+
+@pytest.mark.parametrize(
+    "model, provider, should_drop",
+    [("command-r", "cohere", True), ("gpt-3.5-turbo", "openai", False)],
+)
+def test_drop_params_parallel_tool_calls(model, provider, should_drop):
+    """
+    https://github.com/BerriAI/litellm/issues/4584
+    """
+    response = litellm.utils.get_optional_params(
+        model=model,
+        custom_llm_provider=provider,
+        response_format={"type": "json"},
+        parallel_tool_calls=True,
+        drop_params=True,
+    )
+
+    print(response)
+
+    if should_drop:
+        assert "response_format" not in response
+        assert "parallel_tool_calls" not in response
+    else:
+        assert "response_format" in response
+        assert "parallel_tool_calls" in response
+
+
+def test_dynamic_drop_params_parallel_tool_calls():
+    """
+    https://github.com/BerriAI/litellm/issues/4584
+    """
+    with patch("requests.post", new=MagicMock()) as mock_response:
+        try:
+            response = litellm.completion(
+                model="command-r",
+                messages=[{"role": "user", "content": "Hey, how's it going?"}],
+                parallel_tool_calls=True,
+                drop_params=True,
+            )
+        except Exception as e:
+            pass
+
+        mock_response.assert_called_once()
+        print(mock_response.call_args.kwargs["data"])
+        assert "parallel_tool_calls" not in mock_response.call_args.kwargs["data"]
+
+
+@pytest.mark.parametrize("drop_params", [True, False, None])
+def test_dynamic_drop_additional_params(drop_params):
+    """
+    Make a call to cohere, dropping 'response_format' specifically
+    """
+    if drop_params is True:
+        optional_params = litellm.utils.get_optional_params(
+            model="command-r",
+            custom_llm_provider="cohere",
+            response_format={"type": "json"},
+            additional_drop_params=["response_format"],
+        )
+    else:
+        try:
+            optional_params = litellm.utils.get_optional_params(
+                model="command-r",
+                custom_llm_provider="cohere",
+                response_format={"type": "json"},
+            )
+            pytest.fail("Expected to fail")
+        except Exception as e:
+            pass
+
+
+def test_dynamic_drop_additional_params_e2e():
+    with patch("requests.post", new=MagicMock()) as mock_response:
+        try:
+            response = litellm.completion(
+                model="command-r",
+                messages=[{"role": "user", "content": "Hey, how's it going?"}],
+                response_format={"key": "value"},
+                additional_drop_params=["response_format"],
+            )
+        except Exception as e:
+            pass
+
+        mock_response.assert_called_once()
+        print(mock_response.call_args.kwargs["data"])
+        assert "response_format" not in mock_response.call_args.kwargs["data"]
+        assert "additional_drop_params" not in mock_response.call_args.kwargs["data"]
+
+
+def test_get_optional_params_image_gen():
+    response = litellm.utils.get_optional_params_image_gen(
+        aws_region_name="us-east-1", custom_llm_provider="openai"
+    )
+
+    print(response)
+
+    assert "aws_region_name" not in response
+    response = litellm.utils.get_optional_params_image_gen(
+        aws_region_name="us-east-1", custom_llm_provider="bedrock"
+    )
+
+    print(response)
+
+    assert "aws_region_name" in response
+
+
+def test_bedrock_optional_params_embeddings_provider_specific_params():
+    optional_params = get_optional_params_embeddings(
+        model="my-custom-model",
+        custom_llm_provider="huggingface",
+        wait_for_model=True,
+    )
+    assert len(optional_params) == 1
+
+
+def test_get_optional_params_num_retries():
+    """
+    Relevant issue - https://github.com/BerriAI/litellm/issues/5124
+    """
+    with patch("litellm.main.get_optional_params", new=MagicMock()) as mock_client:
+        _ = litellm.completion(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "Hello world"}],
+            num_retries=10,
+        )
+
+        mock_client.assert_called()
+
+        print(f"mock_client.call_args: {mock_client.call_args}")
+        assert mock_client.call_args.kwargs["max_retries"] == 10
+
+
+@pytest.mark.parametrize(
+    "provider",
+    [
+        "vertex_ai",
+        "vertex_ai_beta",
+    ],
+)
+def test_vertex_safety_settings(provider):
+    litellm.vertex_ai_safety_settings = [
+        {
+            "category": "HARM_CATEGORY_HARASSMENT",
+            "threshold": "BLOCK_NONE",
+        },
+        {
+            "category": "HARM_CATEGORY_HATE_SPEECH",
+            "threshold": "BLOCK_NONE",
+        },
+        {
+            "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+            "threshold": "BLOCK_NONE",
+        },
+        {
+            "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+            "threshold": "BLOCK_NONE",
+        },
+    ]
+
+    optional_params = get_optional_params(
+        model="gemini-1.5-pro", custom_llm_provider=provider
+    )
+    assert len(optional_params) == 1
+
+
+@pytest.mark.parametrize(
+    "model, provider, expectedAddProp",
+    [("gemini-1.5-pro", "vertex_ai_beta", False), ("gpt-3.5-turbo", "openai", True)],
+)
+def test_parse_additional_properties_json_schema(model, provider, expectedAddProp):
+    optional_params = get_optional_params(
+        model=model,
+        custom_llm_provider=provider,
+        response_format={
+            "type": "json_schema",
+            "json_schema": {
+                "name": "math_reasoning",
+                "schema": {
+                    "type": "object",
+                    "properties": {
+                        "steps": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {
+                                    "explanation": {"type": "string"},
+                                    "output": {"type": "string"},
+                                },
+                                "required": ["explanation", "output"],
+                                "additionalProperties": False,
+                            },
+                        },
+                        "final_answer": {"type": "string"},
+                    },
+                    "required": ["steps", "final_answer"],
+                    "additionalProperties": False,
+                },
+                "strict": True,
+            },
+        },
+    )
+
+    print(optional_params)
+
+    if provider == "vertex_ai_beta":
+        schema = optional_params["response_schema"]
+    elif provider == "openai":
+        schema = optional_params["response_format"]["json_schema"]["schema"]
+    assert ("additionalProperties" in schema) == expectedAddProp
+
+
+def test_o1_model_params():
+    optional_params = get_optional_params(
+        model="o1-preview-2024-09-12",
+        custom_llm_provider="openai",
+        seed=10,
+        user="John",
+    )
+    assert optional_params["seed"] == 10
+    assert optional_params["user"] == "John"