mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
o1 - add image param handling (#7312)
* fix(openai.py): fix returning o1 non-streaming requests fixes issue where fake stream always true for o1 * build(model_prices_and_context_window.json): add 'supports_vision' for o1 models * fix: add internal server error exception mapping * fix(base_llm_unit_tests.py): drop temperature from test * test: mark prompt caching as a flaky test
This commit is contained in:
parent
a101c1fff4
commit
62b00cf28d
9 changed files with 68 additions and 79 deletions
|
@ -290,7 +290,10 @@ def exception_type( # type: ignore # noqa: PLR0915
|
|||
response=getattr(original_exception, "response", None),
|
||||
litellm_debug_info=extra_information,
|
||||
)
|
||||
elif "Web server is returning an unknown error" in error_str:
|
||||
elif (
|
||||
"Web server is returning an unknown error" in error_str
|
||||
or "The server had an error processing your request." in error_str
|
||||
):
|
||||
exception_mapping_worked = True
|
||||
raise litellm.InternalServerError(
|
||||
message=f"{exception_provider} - {message}",
|
||||
|
|
|
@ -83,7 +83,10 @@ class BaseConfig(ABC):
|
|||
}
|
||||
|
||||
def should_fake_stream(
|
||||
self, model: str, custom_llm_provider: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
stream: Optional[bool],
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Returns True if the model/provider should fake stream
|
||||
|
|
|
@ -37,8 +37,13 @@ class OpenAIO1Config(OpenAIGPTConfig):
|
|||
return super().get_config()
|
||||
|
||||
def should_fake_stream(
|
||||
self, model: str, custom_llm_provider: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
stream: Optional[bool],
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
) -> bool:
|
||||
if stream is not True:
|
||||
return False
|
||||
supported_stream_models = ["o1-mini", "o1-preview"]
|
||||
for supported_model in supported_stream_models:
|
||||
if supported_model in model:
|
||||
|
@ -142,17 +147,4 @@ class OpenAIO1Config(OpenAIGPTConfig):
|
|||
)
|
||||
messages[i] = new_message # Replace the old message with the new one
|
||||
|
||||
if "content" in message and isinstance(message["content"], list):
|
||||
new_content = []
|
||||
for content_item in message["content"]:
|
||||
if content_item.get("type") == "image_url":
|
||||
if litellm.drop_params is not True:
|
||||
raise ValueError(
|
||||
"Image content is not supported for O-1 models. Set litellm.drop_param to True to drop image content."
|
||||
)
|
||||
# If drop_param is True, we simply don't add the image content to new_content
|
||||
else:
|
||||
new_content.append(content_item)
|
||||
message["content"] = new_content
|
||||
|
||||
return messages
|
||||
|
|
|
@ -453,18 +453,18 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
super().completion()
|
||||
try:
|
||||
fake_stream: bool = False
|
||||
if custom_llm_provider is not None and model is not None:
|
||||
provider_config = ProviderConfigManager.get_provider_chat_config(
|
||||
model=model, provider=LlmProviders(custom_llm_provider)
|
||||
)
|
||||
fake_stream = provider_config.should_fake_stream(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
inference_params = optional_params.copy()
|
||||
stream_options: Optional[dict] = inference_params.pop(
|
||||
"stream_options", None
|
||||
)
|
||||
stream: Optional[bool] = inference_params.pop("stream", False)
|
||||
if custom_llm_provider is not None and model is not None:
|
||||
provider_config = ProviderConfigManager.get_provider_chat_config(
|
||||
model=model, provider=LlmProviders(custom_llm_provider)
|
||||
)
|
||||
fake_stream = provider_config.should_fake_stream(
|
||||
model=model, custom_llm_provider=custom_llm_provider, stream=stream
|
||||
)
|
||||
if headers:
|
||||
inference_params["extra_headers"] = headers
|
||||
if model is None or messages is None:
|
||||
|
@ -502,7 +502,6 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
litellm_params=litellm_params,
|
||||
headers=headers or {},
|
||||
)
|
||||
|
||||
try:
|
||||
max_retries = data.pop("max_retries", 2)
|
||||
if acompletion is True:
|
||||
|
|
|
@ -205,7 +205,7 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": false,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_system_messages": true,
|
||||
"supports_response_schema": true
|
||||
|
@ -219,7 +219,7 @@
|
|||
"cache_read_input_token_cost": 0.0000015,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"o1-mini-2024-09-12": {
|
||||
|
@ -231,7 +231,7 @@
|
|||
"cache_read_input_token_cost": 0.0000015,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"o1-preview": {
|
||||
|
@ -243,7 +243,7 @@
|
|||
"cache_read_input_token_cost": 0.0000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"o1-preview-2024-09-12": {
|
||||
|
@ -255,7 +255,7 @@
|
|||
"cache_read_input_token_cost": 0.0000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"o1-2024-12-17": {
|
||||
|
@ -269,7 +269,7 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": false,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_system_messages": true,
|
||||
"supports_response_schema": true
|
||||
|
|
|
@ -205,7 +205,7 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": false,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_system_messages": true,
|
||||
"supports_response_schema": true
|
||||
|
@ -219,7 +219,7 @@
|
|||
"cache_read_input_token_cost": 0.0000015,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"o1-mini-2024-09-12": {
|
||||
|
@ -231,7 +231,7 @@
|
|||
"cache_read_input_token_cost": 0.0000015,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"o1-preview": {
|
||||
|
@ -243,7 +243,7 @@
|
|||
"cache_read_input_token_cost": 0.0000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"o1-preview-2024-09-12": {
|
||||
|
@ -255,7 +255,7 @@
|
|||
"cache_read_input_token_cost": 0.0000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"o1-2024-12-17": {
|
||||
|
@ -269,7 +269,7 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": false,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_system_messages": true,
|
||||
"supports_response_schema": true
|
||||
|
|
|
@ -140,20 +140,6 @@ class BaseLLMChatTest(ABC):
|
|||
)
|
||||
assert response is not None
|
||||
|
||||
def test_multilingual_requests(self):
|
||||
"""
|
||||
Tests that the provider can handle multilingual requests and invalid utf-8 sequences
|
||||
|
||||
Context: https://github.com/openai/openai-python/issues/1921
|
||||
"""
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
response = self.completion_function(
|
||||
**base_completion_call_args,
|
||||
messages=[{"role": "user", "content": "你好世界!\ud83e, ö"}],
|
||||
)
|
||||
print("multilingual response: ", response)
|
||||
assert response is not None
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_format",
|
||||
[
|
||||
|
@ -343,6 +329,7 @@ class BaseLLMChatTest(ABC):
|
|||
)
|
||||
assert response is not None
|
||||
|
||||
@pytest.mark.flaky(retries=4, delay=1)
|
||||
def test_prompt_caching(self):
|
||||
litellm.set_verbose = True
|
||||
from litellm.utils import supports_prompt_caching
|
||||
|
@ -399,7 +386,6 @@ class BaseLLMChatTest(ABC):
|
|||
],
|
||||
},
|
||||
],
|
||||
temperature=0.2,
|
||||
max_tokens=10,
|
||||
)
|
||||
|
||||
|
|
|
@ -280,6 +280,19 @@ class TestOpenAIChatCompletion(BaseLLMChatTest):
|
|||
"""Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
|
||||
pass
|
||||
|
||||
def test_multilingual_requests(self):
|
||||
"""
|
||||
Tests that the provider can handle multilingual requests and invalid utf-8 sequences
|
||||
|
||||
Context: https://github.com/openai/openai-python/issues/1921
|
||||
"""
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
response = self.completion_function(
|
||||
**base_completion_call_args,
|
||||
messages=[{"role": "user", "content": "你好世界!\ud83e, ö"}],
|
||||
)
|
||||
assert response is not None
|
||||
|
||||
|
||||
def test_completion_bad_org():
|
||||
import litellm
|
||||
|
|
|
@ -15,6 +15,7 @@ from respx import MockRouter
|
|||
|
||||
import litellm
|
||||
from litellm import Choices, Message, ModelResponse
|
||||
from base_llm_unit_tests import BaseLLMChatTest
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", ["o1-preview", "o1-mini", "o1"])
|
||||
|
@ -94,34 +95,6 @@ async def test_o1_handle_tool_calling_optional_params(
|
|||
assert expected_tool_calling_support == ("tools" in supported_params)
|
||||
|
||||
|
||||
# @pytest.mark.parametrize(
|
||||
# "model",
|
||||
# ["o1"], # "o1-preview", "o1-mini",
|
||||
# )
|
||||
# @pytest.mark.asyncio
|
||||
# async def test_o1_handle_streaming_e2e(model):
|
||||
# """
|
||||
# Tests that:
|
||||
# - max_tokens is translated to 'max_completion_tokens'
|
||||
# - role 'system' is translated to 'user'
|
||||
# """
|
||||
# from openai import AsyncOpenAI
|
||||
# from litellm.utils import ProviderConfigManager
|
||||
# from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
|
||||
# from litellm.types.utils import LlmProviders
|
||||
|
||||
# resp = litellm.completion(
|
||||
# model=model,
|
||||
# messages=[{"role": "user", "content": "Hello!"}],
|
||||
# stream=True,
|
||||
# )
|
||||
# assert isinstance(resp, CustomStreamWrapper)
|
||||
# for chunk in resp:
|
||||
# print("chunk: ", chunk)
|
||||
|
||||
# assert True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model", ["gpt-4", "gpt-4-0314", "gpt-4-32k", "o1-preview"])
|
||||
async def test_o1_max_completion_tokens(model: str):
|
||||
|
@ -177,3 +150,23 @@ def test_litellm_responses():
|
|||
print("response: ", response)
|
||||
|
||||
assert isinstance(response.usage.completion_tokens_details, CompletionTokensDetails)
|
||||
|
||||
|
||||
class TestOpenAIO1(BaseLLMChatTest):
|
||||
def get_base_completion_call_args(self):
|
||||
return {
|
||||
"model": "o1",
|
||||
}
|
||||
|
||||
def test_tool_call_no_arguments(self, tool_call_no_arguments):
|
||||
"""Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
|
||||
pass
|
||||
|
||||
|
||||
def test_o1_supports_vision():
|
||||
"""Test that o1 supports vision"""
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
for k, v in litellm.model_cost.items():
|
||||
if k.startswith("o1") and v.get("litellm_provider") == "openai":
|
||||
assert v.get("supports_vision") is True, f"{k} does not support vision"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue