forked from phoenix/litellm-mirror
[Feat] Add max_completion_tokens
param (#5691)
* add max_completion_tokens * add max_completion_tokens * add max_completion_tokens support for OpenAI models * add max_completion_tokens param * add max_completion_tokens for bedrock converse models * add test for converse maxTokens * fix openai o1 param mapping test * move test optional params * add max_completion_tokens for anthropic api * fix conftest * add max_completion tokens for vertex ai partner models * add max_completion_tokens for fireworks ai * add max_completion_tokens for hf rest api * add test for param mapping * add param mapping for vertex, gemini + testing * predibase is the most unstable and unusable llm api in prod, can't handle our ci/cd * add max_completion_tokens to openai supported params * fix fireworks ai param mapping
This commit is contained in:
parent
415a3ede9e
commit
85acdb9193
31 changed files with 591 additions and 35 deletions
54
tests/llm_translation/conftest.py
Normal file
54
tests/llm_translation/conftest.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
# conftest.py
|
||||
|
||||
import importlib
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def setup_and_teardown():
|
||||
"""
|
||||
This fixture reloads litellm before every function. To speed up testing by removing callbacks being chained.
|
||||
"""
|
||||
curr_dir = os.getcwd() # Get the current working directory
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the project directory to the system path
|
||||
|
||||
import litellm
|
||||
from litellm import Router
|
||||
|
||||
importlib.reload(litellm)
|
||||
import asyncio
|
||||
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
print(litellm)
|
||||
# from litellm import Router, completion, aembedding, acompletion, embedding
|
||||
yield
|
||||
|
||||
# Teardown code (executes after the yield point)
|
||||
loop.close() # Close the loop created earlier
|
||||
asyncio.set_event_loop(None) # Remove the reference to the loop
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
# Separate tests in 'test_amazing_proxy_custom_logger.py' and other tests
|
||||
custom_logger_tests = [
|
||||
item for item in items if "custom_logger" in item.parent.name
|
||||
]
|
||||
other_tests = [item for item in items if "custom_logger" not in item.parent.name]
|
||||
|
||||
# Sort tests based on their names
|
||||
custom_logger_tests.sort(key=lambda x: x.name)
|
||||
other_tests.sort(key=lambda x: x.name)
|
||||
|
||||
# Reorder the items list
|
||||
items[:] = custom_logger_tests + other_tests
|
32
tests/llm_translation/test_fireworks_ai_translation.py
Normal file
32
tests/llm_translation/test_fireworks_ai_translation.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm.llms.fireworks_ai import FireworksAIConfig
|
||||
|
||||
fireworks = FireworksAIConfig()
|
||||
|
||||
|
||||
def test_map_openai_params_tool_choice():
|
||||
# Test case 1: tool_choice is "required"
|
||||
result = fireworks.map_openai_params({"tool_choice": "required"}, {}, "some_model")
|
||||
assert result == {"tool_choice": "any"}
|
||||
|
||||
# Test case 2: tool_choice is "auto"
|
||||
result = fireworks.map_openai_params({"tool_choice": "auto"}, {}, "some_model")
|
||||
assert result == {"tool_choice": "auto"}
|
||||
|
||||
# Test case 3: tool_choice is not present
|
||||
result = fireworks.map_openai_params(
|
||||
{"some_other_param": "value"}, {}, "some_model"
|
||||
)
|
||||
assert result == {}
|
||||
|
||||
# Test case 4: tool_choice is None
|
||||
result = fireworks.map_openai_params({"tool_choice": None}, {}, "some_model")
|
||||
assert result == {"tool_choice": None}
|
342
tests/llm_translation/test_max_completion_tokens.py
Normal file
342
tests/llm_translation/test_max_completion_tokens.py
Normal file
|
@ -0,0 +1,342 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
from datetime import datetime
|
||||
from unittest.mock import AsyncMock
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
import httpx
|
||||
import pytest
|
||||
from respx import MockRouter
|
||||
|
||||
import litellm
|
||||
from litellm import Choices, Message, ModelResponse
|
||||
|
||||
# Adds the parent directory to the system path
|
||||
|
||||
|
||||
def return_mocked_response(model: str):
|
||||
if model == "bedrock/mistral.mistral-large-2407-v1:0":
|
||||
return {
|
||||
"metrics": {"latencyMs": 316},
|
||||
"output": {
|
||||
"message": {
|
||||
"content": [{"text": "Hello! How are you doing today? How can"}],
|
||||
"role": "assistant",
|
||||
}
|
||||
},
|
||||
"stopReason": "max_tokens",
|
||||
"usage": {"inputTokens": 5, "outputTokens": 10, "totalTokens": 15},
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"bedrock/mistral.mistral-large-2407-v1:0",
|
||||
],
|
||||
)
|
||||
@pytest.mark.respx
|
||||
@pytest.mark.asyncio()
|
||||
async def test_bedrock_max_completion_tokens(model: str, respx_mock: MockRouter):
|
||||
"""
|
||||
Tests that:
|
||||
- max_completion_tokens is passed as max_tokens to bedrock models
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
|
||||
mock_response = return_mocked_response(model)
|
||||
_model = model.split("/")[1]
|
||||
print("\n\nmock_response: ", mock_response)
|
||||
url = f"https://bedrock-runtime.us-west-2.amazonaws.com/model/{_model}/converse"
|
||||
mock_request = respx_mock.post(url).mock(
|
||||
return_value=httpx.Response(200, json=mock_response)
|
||||
)
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
max_completion_tokens=10,
|
||||
messages=[{"role": "user", "content": "Hello!"}],
|
||||
)
|
||||
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
|
||||
print("request_body: ", request_body)
|
||||
|
||||
assert request_body == {
|
||||
"messages": [{"role": "user", "content": [{"text": "Hello!"}]}],
|
||||
"additionalModelRequestFields": {},
|
||||
"system": [],
|
||||
"inferenceConfig": {"maxTokens": 10},
|
||||
}
|
||||
print(f"response: {response}")
|
||||
assert isinstance(response, ModelResponse)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
["anthropic/claude-3-sonnet-20240229", "anthropic/claude-3-opus-20240229,"],
|
||||
)
|
||||
@pytest.mark.respx
|
||||
@pytest.mark.asyncio()
|
||||
async def test_anthropic_api_max_completion_tokens(model: str, respx_mock: MockRouter):
|
||||
"""
|
||||
Tests that:
|
||||
- max_completion_tokens is passed as max_tokens to anthropic models
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
|
||||
mock_response = {
|
||||
"content": [{"text": "Hi! My name is Claude.", "type": "text"}],
|
||||
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
|
||||
"model": "claude-3-5-sonnet-20240620",
|
||||
"role": "assistant",
|
||||
"stop_reason": "end_turn",
|
||||
"stop_sequence": None,
|
||||
"type": "message",
|
||||
"usage": {"input_tokens": 2095, "output_tokens": 503},
|
||||
}
|
||||
|
||||
print("\n\nmock_response: ", mock_response)
|
||||
url = f"https://api.anthropic.com/v1/messages"
|
||||
mock_request = respx_mock.post(url).mock(
|
||||
return_value=httpx.Response(200, json=mock_response)
|
||||
)
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
max_completion_tokens=10,
|
||||
messages=[{"role": "user", "content": "Hello!"}],
|
||||
)
|
||||
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
|
||||
print("request_body: ", request_body)
|
||||
|
||||
assert request_body == {
|
||||
"messages": [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}],
|
||||
"max_tokens": 10,
|
||||
"model": model.split("/")[-1],
|
||||
}
|
||||
print(f"response: {response}")
|
||||
assert isinstance(response, ModelResponse)
|
||||
|
||||
|
||||
def test_all_model_configs():
|
||||
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.ai21.transformation import (
|
||||
VertexAIAi21Config,
|
||||
)
|
||||
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.llama3.transformation import (
|
||||
VertexAILlama3Config,
|
||||
)
|
||||
|
||||
assert (
|
||||
"max_completion_tokens" in VertexAILlama3Config().get_supported_openai_params()
|
||||
)
|
||||
assert VertexAILlama3Config().map_openai_params(
|
||||
{"max_completion_tokens": 10}, {}, "llama3"
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params()
|
||||
assert VertexAIAi21Config().map_openai_params(
|
||||
{"max_completion_tokens": 10}, {}, "llama3"
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
from litellm.llms.fireworks_ai import FireworksAIConfig
|
||||
|
||||
assert "max_completion_tokens" in FireworksAIConfig().get_supported_openai_params()
|
||||
assert FireworksAIConfig().map_openai_params(
|
||||
{"max_completion_tokens": 10}, {}, "llama3"
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
from litellm.llms.huggingface_restapi import HuggingfaceConfig
|
||||
|
||||
assert "max_completion_tokens" in HuggingfaceConfig().get_supported_openai_params()
|
||||
assert HuggingfaceConfig().map_openai_params({"max_completion_tokens": 10}, {}) == {
|
||||
"max_new_tokens": 10
|
||||
}
|
||||
|
||||
from litellm.llms.nvidia_nim import NvidiaNimConfig
|
||||
|
||||
assert "max_completion_tokens" in NvidiaNimConfig().get_supported_openai_params(
|
||||
model="llama3"
|
||||
)
|
||||
assert NvidiaNimConfig().map_openai_params(
|
||||
model="llama3",
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
from litellm.llms.ollama_chat import OllamaChatConfig
|
||||
|
||||
assert "max_completion_tokens" in OllamaChatConfig().get_supported_openai_params()
|
||||
assert OllamaChatConfig().map_openai_params(
|
||||
model="llama3",
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
) == {"num_predict": 10}
|
||||
|
||||
from litellm.llms.predibase import PredibaseConfig
|
||||
|
||||
assert "max_completion_tokens" in PredibaseConfig().get_supported_openai_params()
|
||||
assert PredibaseConfig().map_openai_params(
|
||||
{"max_completion_tokens": 10},
|
||||
{},
|
||||
) == {"max_new_tokens": 10}
|
||||
|
||||
from litellm.llms.text_completion_codestral import MistralTextCompletionConfig
|
||||
|
||||
assert (
|
||||
"max_completion_tokens"
|
||||
in MistralTextCompletionConfig().get_supported_openai_params()
|
||||
)
|
||||
assert MistralTextCompletionConfig().map_openai_params(
|
||||
{"max_completion_tokens": 10},
|
||||
{},
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
from litellm.llms.volcengine import VolcEngineConfig
|
||||
|
||||
assert "max_completion_tokens" in VolcEngineConfig().get_supported_openai_params(
|
||||
model="llama3"
|
||||
)
|
||||
assert VolcEngineConfig().map_openai_params(
|
||||
model="llama3",
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
from litellm.llms.AI21.chat import AI21ChatConfig
|
||||
|
||||
assert "max_completion_tokens" in AI21ChatConfig().get_supported_openai_params(
|
||||
"jamba-1.5-mini@001"
|
||||
)
|
||||
assert AI21ChatConfig().map_openai_params(
|
||||
model="jamba-1.5-mini@001",
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
from litellm.llms.AzureOpenAI.azure import AzureOpenAIConfig
|
||||
|
||||
assert "max_completion_tokens" in AzureOpenAIConfig().get_supported_openai_params()
|
||||
assert AzureOpenAIConfig().map_openai_params(
|
||||
model="gpt-3.5-turbo",
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
api_version="2022-12-01",
|
||||
drop_params=False,
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
from litellm.llms.bedrock.chat import AmazonConverseConfig
|
||||
|
||||
assert (
|
||||
"max_completion_tokens"
|
||||
in AmazonConverseConfig().get_supported_openai_params(
|
||||
model="anthropic.claude-3-sonnet-20240229-v1:0"
|
||||
)
|
||||
)
|
||||
assert AmazonConverseConfig().map_openai_params(
|
||||
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
drop_params=False,
|
||||
) == {"maxTokens": 10}
|
||||
|
||||
from litellm.llms.text_completion_codestral import MistralTextCompletionConfig
|
||||
|
||||
assert (
|
||||
"max_completion_tokens"
|
||||
in MistralTextCompletionConfig().get_supported_openai_params()
|
||||
)
|
||||
assert MistralTextCompletionConfig().map_openai_params(
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
from litellm.llms.bedrock.common_utils import (
|
||||
AmazonAnthropicClaude3Config,
|
||||
AmazonAnthropicConfig,
|
||||
)
|
||||
|
||||
assert (
|
||||
"max_completion_tokens"
|
||||
in AmazonAnthropicClaude3Config().get_supported_openai_params()
|
||||
)
|
||||
|
||||
assert AmazonAnthropicClaude3Config().map_openai_params(
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
assert (
|
||||
"max_completion_tokens" in AmazonAnthropicConfig().get_supported_openai_params()
|
||||
)
|
||||
|
||||
assert AmazonAnthropicConfig().map_openai_params(
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
) == {"max_tokens_to_sample": 10}
|
||||
|
||||
from litellm.llms.databricks.chat import DatabricksConfig
|
||||
|
||||
assert "max_completion_tokens" in DatabricksConfig().get_supported_openai_params()
|
||||
|
||||
assert DatabricksConfig().map_openai_params(
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_anthropic import (
|
||||
VertexAIAnthropicConfig,
|
||||
)
|
||||
|
||||
assert (
|
||||
"max_completion_tokens"
|
||||
in VertexAIAnthropicConfig().get_supported_openai_params()
|
||||
)
|
||||
|
||||
assert VertexAIAnthropicConfig().map_openai_params(
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
|
||||
VertexAIConfig,
|
||||
GoogleAIStudioGeminiConfig,
|
||||
VertexGeminiConfig,
|
||||
)
|
||||
|
||||
assert "max_completion_tokens" in VertexAIConfig().get_supported_openai_params()
|
||||
|
||||
assert VertexAIConfig().map_openai_params(
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
) == {"max_output_tokens": 10}
|
||||
|
||||
assert (
|
||||
"max_completion_tokens"
|
||||
in GoogleAIStudioGeminiConfig().get_supported_openai_params()
|
||||
)
|
||||
|
||||
assert GoogleAIStudioGeminiConfig().map_openai_params(
|
||||
model="gemini-1.0-pro",
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
) == {"max_output_tokens": 10}
|
||||
|
||||
assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params()
|
||||
|
||||
assert VertexGeminiConfig().map_openai_params(
|
||||
model="gemini-1.0-pro",
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
drop_params=False,
|
||||
) == {"max_output_tokens": 10}
|
101
tests/llm_translation/test_openai_o1.py
Normal file
101
tests/llm_translation/test_openai_o1.py
Normal file
|
@ -0,0 +1,101 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from respx import MockRouter
|
||||
|
||||
import litellm
|
||||
from litellm import Choices, Message, ModelResponse
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.respx
|
||||
async def test_o1_handle_system_role(respx_mock: MockRouter):
|
||||
"""
|
||||
Tests that:
|
||||
- max_tokens is translated to 'max_completion_tokens'
|
||||
- role 'system' is translated to 'user'
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
|
||||
mock_response = ModelResponse(
|
||||
id="cmpl-mock",
|
||||
choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
|
||||
created=int(datetime.now().timestamp()),
|
||||
model="o1-preview",
|
||||
)
|
||||
|
||||
mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
|
||||
return_value=httpx.Response(200, json=mock_response.dict())
|
||||
)
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model="o1-preview",
|
||||
max_tokens=10,
|
||||
messages=[{"role": "system", "content": "Hello!"}],
|
||||
)
|
||||
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
|
||||
print("request_body: ", request_body)
|
||||
|
||||
assert request_body == {
|
||||
"model": "o1-preview",
|
||||
"max_completion_tokens": 10,
|
||||
"messages": [{"role": "user", "content": "Hello!"}],
|
||||
}
|
||||
|
||||
print(f"response: {response}")
|
||||
assert isinstance(response, ModelResponse)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.respx
|
||||
@pytest.mark.parametrize("model", ["gpt-4", "gpt-4-0314", "gpt-4-32k", "o1-preview"])
|
||||
async def test_o1_max_completion_tokens(respx_mock: MockRouter, model: str):
|
||||
"""
|
||||
Tests that:
|
||||
- max_completion_tokens is passed directly to OpenAI chat completion models
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
|
||||
mock_response = ModelResponse(
|
||||
id="cmpl-mock",
|
||||
choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
|
||||
created=int(datetime.now().timestamp()),
|
||||
model=model,
|
||||
)
|
||||
|
||||
mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
|
||||
return_value=httpx.Response(200, json=mock_response.dict())
|
||||
)
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
max_completion_tokens=10,
|
||||
messages=[{"role": "user", "content": "Hello!"}],
|
||||
)
|
||||
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
|
||||
print("request_body: ", request_body)
|
||||
|
||||
assert request_body == {
|
||||
"model": model,
|
||||
"max_completion_tokens": 10,
|
||||
"messages": [{"role": "user", "content": "Hello!"}],
|
||||
}
|
||||
|
||||
print(f"response: {response}")
|
||||
assert isinstance(response, ModelResponse)
|
602
tests/llm_translation/test_optional_params.py
Normal file
602
tests/llm_translation/test_optional_params.py
Normal file
|
@ -0,0 +1,602 @@
|
|||
#### What this tests ####
|
||||
# This tests if get_optional_params works as expected
|
||||
import asyncio
|
||||
import inspect
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import litellm
|
||||
from litellm.llms.prompt_templates.factory import map_system_message_pt
|
||||
from litellm.types.completion import (
|
||||
ChatCompletionMessageParam,
|
||||
ChatCompletionSystemMessageParam,
|
||||
ChatCompletionUserMessageParam,
|
||||
)
|
||||
from litellm.utils import (
|
||||
get_optional_params,
|
||||
get_optional_params_embeddings,
|
||||
get_optional_params_image_gen,
|
||||
)
|
||||
|
||||
## get_optional_params_embeddings
|
||||
### Models: OpenAI, Azure, Bedrock
|
||||
### Scenarios: w/ optional params + litellm.drop_params = True
|
||||
|
||||
|
||||
def test_supports_system_message():
|
||||
"""
|
||||
Check if litellm.completion(...,supports_system_message=False)
|
||||
"""
|
||||
messages = [
|
||||
ChatCompletionSystemMessageParam(role="system", content="Listen here!"),
|
||||
ChatCompletionUserMessageParam(role="user", content="Hello there!"),
|
||||
]
|
||||
|
||||
new_messages = map_system_message_pt(messages=messages)
|
||||
|
||||
assert len(new_messages) == 1
|
||||
assert new_messages[0]["role"] == "user"
|
||||
|
||||
## confirm you can make a openai call with this param
|
||||
|
||||
response = litellm.completion(
|
||||
model="gpt-3.5-turbo", messages=new_messages, supports_system_message=False
|
||||
)
|
||||
|
||||
assert isinstance(response, litellm.ModelResponse)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"stop_sequence, expected_count", [("\n", 0), (["\n"], 0), (["finish_reason"], 1)]
|
||||
)
|
||||
def test_anthropic_optional_params(stop_sequence, expected_count):
|
||||
"""
|
||||
Test if whitespace character optional param is dropped by anthropic
|
||||
"""
|
||||
litellm.drop_params = True
|
||||
optional_params = get_optional_params(
|
||||
model="claude-3", custom_llm_provider="anthropic", stop=stop_sequence
|
||||
)
|
||||
assert len(optional_params) == expected_count
|
||||
|
||||
|
||||
def test_bedrock_optional_params_embeddings():
|
||||
litellm.drop_params = True
|
||||
optional_params = get_optional_params_embeddings(
|
||||
model="", user="John", encoding_format=None, custom_llm_provider="bedrock"
|
||||
)
|
||||
assert len(optional_params) == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, expected_dimensions, dimensions_kwarg",
|
||||
[
|
||||
("bedrock/amazon.titan-embed-text-v1", False, None),
|
||||
("bedrock/amazon.titan-embed-image-v1", True, "embeddingConfig"),
|
||||
("bedrock/amazon.titan-embed-text-v2:0", True, "dimensions"),
|
||||
("bedrock/cohere.embed-multilingual-v3", False, None),
|
||||
],
|
||||
)
|
||||
def test_bedrock_optional_params_embeddings_dimension(
|
||||
model, expected_dimensions, dimensions_kwarg
|
||||
):
|
||||
litellm.drop_params = True
|
||||
optional_params = get_optional_params_embeddings(
|
||||
model=model,
|
||||
user="John",
|
||||
encoding_format=None,
|
||||
dimensions=20,
|
||||
custom_llm_provider="bedrock",
|
||||
)
|
||||
if expected_dimensions:
|
||||
assert len(optional_params) == 1
|
||||
else:
|
||||
assert len(optional_params) == 0
|
||||
|
||||
if dimensions_kwarg is not None:
|
||||
assert dimensions_kwarg in optional_params
|
||||
|
||||
|
||||
def test_google_ai_studio_optional_params_embeddings():
|
||||
optional_params = get_optional_params_embeddings(
|
||||
model="",
|
||||
user="John",
|
||||
encoding_format=None,
|
||||
custom_llm_provider="gemini",
|
||||
drop_params=True,
|
||||
)
|
||||
assert len(optional_params) == 0
|
||||
|
||||
|
||||
def test_openai_optional_params_embeddings():
|
||||
litellm.drop_params = True
|
||||
optional_params = get_optional_params_embeddings(
|
||||
model="", user="John", encoding_format=None, custom_llm_provider="openai"
|
||||
)
|
||||
assert len(optional_params) == 1
|
||||
assert optional_params["user"] == "John"
|
||||
|
||||
|
||||
def test_azure_optional_params_embeddings():
|
||||
litellm.drop_params = True
|
||||
optional_params = get_optional_params_embeddings(
|
||||
model="chatgpt-v-2",
|
||||
user="John",
|
||||
encoding_format=None,
|
||||
custom_llm_provider="azure",
|
||||
)
|
||||
assert len(optional_params) == 1
|
||||
assert optional_params["user"] == "John"
|
||||
|
||||
|
||||
def test_databricks_optional_params():
|
||||
litellm.drop_params = True
|
||||
optional_params = get_optional_params(
|
||||
model="",
|
||||
user="John",
|
||||
custom_llm_provider="databricks",
|
||||
max_tokens=10,
|
||||
temperature=0.2,
|
||||
)
|
||||
print(f"optional_params: {optional_params}")
|
||||
assert len(optional_params) == 2
|
||||
assert "user" not in optional_params
|
||||
|
||||
|
||||
def test_gemini_optional_params():
|
||||
litellm.drop_params = True
|
||||
optional_params = get_optional_params(
|
||||
model="",
|
||||
custom_llm_provider="gemini",
|
||||
max_tokens=10,
|
||||
frequency_penalty=10,
|
||||
)
|
||||
print(f"optional_params: {optional_params}")
|
||||
assert len(optional_params) == 1
|
||||
assert "frequency_penalty" not in optional_params
|
||||
|
||||
|
||||
def test_azure_ai_mistral_optional_params():
|
||||
litellm.drop_params = True
|
||||
optional_params = get_optional_params(
|
||||
model="mistral-large-latest",
|
||||
user="John",
|
||||
custom_llm_provider="openai",
|
||||
max_tokens=10,
|
||||
temperature=0.2,
|
||||
)
|
||||
assert "user" not in optional_params
|
||||
|
||||
|
||||
def test_vertex_ai_llama_3_optional_params():
|
||||
litellm.vertex_llama3_models = ["meta/llama3-405b-instruct-maas"]
|
||||
litellm.drop_params = True
|
||||
optional_params = get_optional_params(
|
||||
model="meta/llama3-405b-instruct-maas",
|
||||
user="John",
|
||||
custom_llm_provider="vertex_ai",
|
||||
max_tokens=10,
|
||||
temperature=0.2,
|
||||
)
|
||||
assert "user" not in optional_params
|
||||
|
||||
|
||||
def test_vertex_ai_mistral_optional_params():
|
||||
litellm.vertex_mistral_models = ["mistral-large@2407"]
|
||||
litellm.drop_params = True
|
||||
optional_params = get_optional_params(
|
||||
model="mistral-large@2407",
|
||||
user="John",
|
||||
custom_llm_provider="vertex_ai",
|
||||
max_tokens=10,
|
||||
temperature=0.2,
|
||||
)
|
||||
assert "user" not in optional_params
|
||||
assert "max_tokens" in optional_params
|
||||
assert "temperature" in optional_params
|
||||
|
||||
|
||||
def test_azure_gpt_optional_params_gpt_vision():
|
||||
# for OpenAI, Azure all extra params need to get passed as extra_body to OpenAI python. We assert we actually set extra_body here
|
||||
optional_params = litellm.utils.get_optional_params(
|
||||
model="",
|
||||
user="John",
|
||||
custom_llm_provider="azure",
|
||||
max_tokens=10,
|
||||
temperature=0.2,
|
||||
enhancements={"ocr": {"enabled": True}, "grounding": {"enabled": True}},
|
||||
dataSources=[
|
||||
{
|
||||
"type": "AzureComputerVision",
|
||||
"parameters": {
|
||||
"endpoint": "<your_computer_vision_endpoint>",
|
||||
"key": "<your_computer_vision_key>",
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
print(optional_params)
|
||||
assert optional_params["max_tokens"] == 10
|
||||
assert optional_params["temperature"] == 0.2
|
||||
assert optional_params["extra_body"] == {
|
||||
"enhancements": {"ocr": {"enabled": True}, "grounding": {"enabled": True}},
|
||||
"dataSources": [
|
||||
{
|
||||
"type": "AzureComputerVision",
|
||||
"parameters": {
|
||||
"endpoint": "<your_computer_vision_endpoint>",
|
||||
"key": "<your_computer_vision_key>",
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
# test_azure_gpt_optional_params_gpt_vision()
|
||||
|
||||
|
||||
def test_azure_gpt_optional_params_gpt_vision_with_extra_body():
|
||||
# if user passes extra_body, we should not over write it, we should pass it along to OpenAI python
|
||||
optional_params = litellm.utils.get_optional_params(
|
||||
model="",
|
||||
user="John",
|
||||
custom_llm_provider="azure",
|
||||
max_tokens=10,
|
||||
temperature=0.2,
|
||||
extra_body={
|
||||
"meta": "hi",
|
||||
},
|
||||
enhancements={"ocr": {"enabled": True}, "grounding": {"enabled": True}},
|
||||
dataSources=[
|
||||
{
|
||||
"type": "AzureComputerVision",
|
||||
"parameters": {
|
||||
"endpoint": "<your_computer_vision_endpoint>",
|
||||
"key": "<your_computer_vision_key>",
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
print(optional_params)
|
||||
assert optional_params["max_tokens"] == 10
|
||||
assert optional_params["temperature"] == 0.2
|
||||
assert optional_params["extra_body"] == {
|
||||
"enhancements": {"ocr": {"enabled": True}, "grounding": {"enabled": True}},
|
||||
"dataSources": [
|
||||
{
|
||||
"type": "AzureComputerVision",
|
||||
"parameters": {
|
||||
"endpoint": "<your_computer_vision_endpoint>",
|
||||
"key": "<your_computer_vision_key>",
|
||||
},
|
||||
}
|
||||
],
|
||||
"meta": "hi",
|
||||
}
|
||||
|
||||
|
||||
# test_azure_gpt_optional_params_gpt_vision_with_extra_body()
|
||||
|
||||
|
||||
def test_openai_extra_headers():
|
||||
optional_params = litellm.utils.get_optional_params(
|
||||
model="",
|
||||
user="John",
|
||||
custom_llm_provider="openai",
|
||||
max_tokens=10,
|
||||
temperature=0.2,
|
||||
extra_headers={"AI-Resource Group": "ishaan-resource"},
|
||||
)
|
||||
|
||||
print(optional_params)
|
||||
assert optional_params["max_tokens"] == 10
|
||||
assert optional_params["temperature"] == 0.2
|
||||
assert optional_params["extra_headers"] == {"AI-Resource Group": "ishaan-resource"}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"api_version",
|
||||
[
|
||||
"2024-02-01",
|
||||
"2024-07-01", # potential future version with tool_choice="required" supported
|
||||
"2023-07-01-preview",
|
||||
"2024-03-01-preview",
|
||||
],
|
||||
)
|
||||
def test_azure_tool_choice(api_version):
|
||||
"""
|
||||
Test azure tool choice on older + new version
|
||||
"""
|
||||
litellm.drop_params = True
|
||||
optional_params = litellm.utils.get_optional_params(
|
||||
model="chatgpt-v-2",
|
||||
user="John",
|
||||
custom_llm_provider="azure",
|
||||
max_tokens=10,
|
||||
temperature=0.2,
|
||||
extra_headers={"AI-Resource Group": "ishaan-resource"},
|
||||
tool_choice="required",
|
||||
api_version=api_version,
|
||||
)
|
||||
|
||||
print(f"{optional_params}")
|
||||
if api_version == "2024-07-01":
|
||||
assert optional_params["tool_choice"] == "required"
|
||||
else:
|
||||
assert (
|
||||
"tool_choice" not in optional_params
|
||||
), "tool choice should not be present. Got - tool_choice={} for api version={}".format(
|
||||
optional_params["tool_choice"], api_version
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("drop_params", [True, False, None])
|
||||
def test_dynamic_drop_params(drop_params):
|
||||
"""
|
||||
Make a call to cohere w/ drop params = True vs. false.
|
||||
"""
|
||||
if drop_params is True:
|
||||
optional_params = litellm.utils.get_optional_params(
|
||||
model="command-r",
|
||||
custom_llm_provider="cohere",
|
||||
response_format={"type": "json"},
|
||||
drop_params=drop_params,
|
||||
)
|
||||
else:
|
||||
try:
|
||||
optional_params = litellm.utils.get_optional_params(
|
||||
model="command-r",
|
||||
custom_llm_provider="cohere",
|
||||
response_format={"type": "json"},
|
||||
drop_params=drop_params,
|
||||
)
|
||||
pytest.fail("Expected to fail")
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
|
||||
def test_dynamic_drop_params_e2e():
|
||||
with patch("requests.post", new=MagicMock()) as mock_response:
|
||||
try:
|
||||
response = litellm.completion(
|
||||
model="command-r",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
response_format={"key": "value"},
|
||||
drop_params=True,
|
||||
)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
mock_response.assert_called_once()
|
||||
print(mock_response.call_args.kwargs["data"])
|
||||
assert "response_format" not in mock_response.call_args.kwargs["data"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, provider, should_drop",
|
||||
[("command-r", "cohere", True), ("gpt-3.5-turbo", "openai", False)],
|
||||
)
|
||||
def test_drop_params_parallel_tool_calls(model, provider, should_drop):
|
||||
"""
|
||||
https://github.com/BerriAI/litellm/issues/4584
|
||||
"""
|
||||
response = litellm.utils.get_optional_params(
|
||||
model=model,
|
||||
custom_llm_provider=provider,
|
||||
response_format={"type": "json"},
|
||||
parallel_tool_calls=True,
|
||||
drop_params=True,
|
||||
)
|
||||
|
||||
print(response)
|
||||
|
||||
if should_drop:
|
||||
assert "response_format" not in response
|
||||
assert "parallel_tool_calls" not in response
|
||||
else:
|
||||
assert "response_format" in response
|
||||
assert "parallel_tool_calls" in response
|
||||
|
||||
|
||||
def test_dynamic_drop_params_parallel_tool_calls():
|
||||
"""
|
||||
https://github.com/BerriAI/litellm/issues/4584
|
||||
"""
|
||||
with patch("requests.post", new=MagicMock()) as mock_response:
|
||||
try:
|
||||
response = litellm.completion(
|
||||
model="command-r",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
parallel_tool_calls=True,
|
||||
drop_params=True,
|
||||
)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
mock_response.assert_called_once()
|
||||
print(mock_response.call_args.kwargs["data"])
|
||||
assert "parallel_tool_calls" not in mock_response.call_args.kwargs["data"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("drop_params", [True, False, None])
|
||||
def test_dynamic_drop_additional_params(drop_params):
|
||||
"""
|
||||
Make a call to cohere, dropping 'response_format' specifically
|
||||
"""
|
||||
if drop_params is True:
|
||||
optional_params = litellm.utils.get_optional_params(
|
||||
model="command-r",
|
||||
custom_llm_provider="cohere",
|
||||
response_format={"type": "json"},
|
||||
additional_drop_params=["response_format"],
|
||||
)
|
||||
else:
|
||||
try:
|
||||
optional_params = litellm.utils.get_optional_params(
|
||||
model="command-r",
|
||||
custom_llm_provider="cohere",
|
||||
response_format={"type": "json"},
|
||||
)
|
||||
pytest.fail("Expected to fail")
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
|
||||
def test_dynamic_drop_additional_params_e2e():
|
||||
with patch("requests.post", new=MagicMock()) as mock_response:
|
||||
try:
|
||||
response = litellm.completion(
|
||||
model="command-r",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
response_format={"key": "value"},
|
||||
additional_drop_params=["response_format"],
|
||||
)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
mock_response.assert_called_once()
|
||||
print(mock_response.call_args.kwargs["data"])
|
||||
assert "response_format" not in mock_response.call_args.kwargs["data"]
|
||||
assert "additional_drop_params" not in mock_response.call_args.kwargs["data"]
|
||||
|
||||
|
||||
def test_get_optional_params_image_gen():
|
||||
response = litellm.utils.get_optional_params_image_gen(
|
||||
aws_region_name="us-east-1", custom_llm_provider="openai"
|
||||
)
|
||||
|
||||
print(response)
|
||||
|
||||
assert "aws_region_name" not in response
|
||||
response = litellm.utils.get_optional_params_image_gen(
|
||||
aws_region_name="us-east-1", custom_llm_provider="bedrock"
|
||||
)
|
||||
|
||||
print(response)
|
||||
|
||||
assert "aws_region_name" in response
|
||||
|
||||
|
||||
def test_bedrock_optional_params_embeddings_provider_specific_params():
|
||||
optional_params = get_optional_params_embeddings(
|
||||
model="my-custom-model",
|
||||
custom_llm_provider="huggingface",
|
||||
wait_for_model=True,
|
||||
)
|
||||
assert len(optional_params) == 1
|
||||
|
||||
|
||||
def test_get_optional_params_num_retries():
|
||||
"""
|
||||
Relevant issue - https://github.com/BerriAI/litellm/issues/5124
|
||||
"""
|
||||
with patch("litellm.main.get_optional_params", new=MagicMock()) as mock_client:
|
||||
_ = litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hello world"}],
|
||||
num_retries=10,
|
||||
)
|
||||
|
||||
mock_client.assert_called()
|
||||
|
||||
print(f"mock_client.call_args: {mock_client.call_args}")
|
||||
assert mock_client.call_args.kwargs["max_retries"] == 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider",
|
||||
[
|
||||
"vertex_ai",
|
||||
"vertex_ai_beta",
|
||||
],
|
||||
)
|
||||
def test_vertex_safety_settings(provider):
|
||||
litellm.vertex_ai_safety_settings = [
|
||||
{
|
||||
"category": "HARM_CATEGORY_HARASSMENT",
|
||||
"threshold": "BLOCK_NONE",
|
||||
},
|
||||
{
|
||||
"category": "HARM_CATEGORY_HATE_SPEECH",
|
||||
"threshold": "BLOCK_NONE",
|
||||
},
|
||||
{
|
||||
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
||||
"threshold": "BLOCK_NONE",
|
||||
},
|
||||
{
|
||||
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
||||
"threshold": "BLOCK_NONE",
|
||||
},
|
||||
]
|
||||
|
||||
optional_params = get_optional_params(
|
||||
model="gemini-1.5-pro", custom_llm_provider=provider
|
||||
)
|
||||
assert len(optional_params) == 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, provider, expectedAddProp",
|
||||
[("gemini-1.5-pro", "vertex_ai_beta", False), ("gpt-3.5-turbo", "openai", True)],
|
||||
)
|
||||
def test_parse_additional_properties_json_schema(model, provider, expectedAddProp):
|
||||
optional_params = get_optional_params(
|
||||
model=model,
|
||||
custom_llm_provider=provider,
|
||||
response_format={
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "math_reasoning",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"steps": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"explanation": {"type": "string"},
|
||||
"output": {"type": "string"},
|
||||
},
|
||||
"required": ["explanation", "output"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
},
|
||||
"final_answer": {"type": "string"},
|
||||
},
|
||||
"required": ["steps", "final_answer"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
"strict": True,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
print(optional_params)
|
||||
|
||||
if provider == "vertex_ai_beta":
|
||||
schema = optional_params["response_schema"]
|
||||
elif provider == "openai":
|
||||
schema = optional_params["response_format"]["json_schema"]["schema"]
|
||||
assert ("additionalProperties" in schema) == expectedAddProp
|
||||
|
||||
|
||||
def test_o1_model_params():
|
||||
optional_params = get_optional_params(
|
||||
model="o1-preview-2024-09-12",
|
||||
custom_llm_provider="openai",
|
||||
seed=10,
|
||||
user="John",
|
||||
)
|
||||
assert optional_params["seed"] == 10
|
||||
assert optional_params["user"] == "John"
|
Loading…
Add table
Add a link
Reference in a new issue