mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
fix(router.py): add more deployment timeout debug information for tim… (#8523)
* fix(router.py): add more deployment timeout debug information for timeout errors help understand why some calls in high-traffic don't respect their model-specific timeouts * test(test_convert_dict_to_response.py): unit test ensuring empty str is not converted to None Addresses https://github.com/BerriAI/litellm/issues/8507 * fix(convert_dict_to_response.py): handle empty message str - don't return back as 'None' Fixes https://github.com/BerriAI/litellm/issues/8507 * test(test_completion.py): add e2e test
This commit is contained in:
parent
b70981b8fb
commit
c399232b2c
5 changed files with 234 additions and 60 deletions
|
@ -1,10 +1,10 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
import uuid
|
import uuid
|
||||||
import re
|
from typing import Dict, Iterable, List, Literal, Optional, Tuple, Union
|
||||||
from typing import Dict, Iterable, List, Literal, Optional, Union, Tuple
|
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
|
@ -221,9 +221,19 @@ def _handle_invalid_parallel_tool_calls(
|
||||||
# if there is a JSONDecodeError, return the original tool_calls
|
# if there is a JSONDecodeError, return the original tool_calls
|
||||||
return tool_calls
|
return tool_calls
|
||||||
|
|
||||||
def _parse_content_for_reasoning(message_text: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
|
|
||||||
|
def _parse_content_for_reasoning(
|
||||||
|
message_text: Optional[str],
|
||||||
|
) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
"""
|
||||||
|
Parse the content for reasoning
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- reasoning_content: The content of the reasoning
|
||||||
|
- content: The content of the message
|
||||||
|
"""
|
||||||
if not message_text:
|
if not message_text:
|
||||||
return None, None
|
return None, message_text
|
||||||
|
|
||||||
reasoning_match = re.match(r"<think>(.*?)</think>(.*)", message_text, re.DOTALL)
|
reasoning_match = re.match(r"<think>(.*?)</think>(.*)", message_text, re.DOTALL)
|
||||||
|
|
||||||
|
@ -232,6 +242,7 @@ def _parse_content_for_reasoning(message_text: Optional[str]) -> Tuple[Optional[
|
||||||
|
|
||||||
return None, message_text
|
return None, message_text
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMResponseObjectHandler:
|
class LiteLLMResponseObjectHandler:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -445,9 +456,15 @@ def convert_to_model_response_object( # noqa: PLR0915
|
||||||
provider_specific_fields[field] = choice["message"][field]
|
provider_specific_fields[field] = choice["message"][field]
|
||||||
|
|
||||||
# Handle reasoning models that display `reasoning_content` within `content`
|
# Handle reasoning models that display `reasoning_content` within `content`
|
||||||
reasoning_content, content = _parse_content_for_reasoning(choice["message"].get("content", None))
|
|
||||||
|
reasoning_content, content = _parse_content_for_reasoning(
|
||||||
|
choice["message"].get("content")
|
||||||
|
)
|
||||||
|
|
||||||
if reasoning_content:
|
if reasoning_content:
|
||||||
provider_specific_fields["reasoning_content"] = reasoning_content
|
provider_specific_fields["reasoning_content"] = (
|
||||||
|
reasoning_content
|
||||||
|
)
|
||||||
|
|
||||||
message = Message(
|
message = Message(
|
||||||
content=content,
|
content=content,
|
||||||
|
|
|
@ -1,51 +1,10 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: gpt-3.5-turbo-testing
|
|
||||||
litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
- model_name: gpt-4
|
|
||||||
litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
- model_name: fake-openai-endpoint
|
|
||||||
litellm_params:
|
|
||||||
model: openai/fake
|
|
||||||
api_key: fake-key
|
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
||||||
- model_name: azure-gpt-35-turbo
|
- model_name: azure-gpt-35-turbo
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: azure/chatgpt-v-2
|
model: azure/chatgpt-v-2
|
||||||
api_key: os.environ/AZURE_API_KEY
|
api_key: os.environ/AZURE_API_KEY
|
||||||
api_base: os.environ/AZURE_API_BASE
|
api_base: os.environ/AZURE_API_BASE
|
||||||
timeout: 0.000000001
|
timeout: 0.000000001
|
||||||
- model_name: o3-mini
|
|
||||||
litellm_params:
|
|
||||||
model: o3-mini
|
|
||||||
rpm: 3
|
|
||||||
- model_name: anthropic-claude
|
|
||||||
litellm_params:
|
|
||||||
model: claude-3-5-haiku-20241022
|
|
||||||
timeout: 0.000000001
|
|
||||||
- model_name: groq/*
|
|
||||||
litellm_params:
|
|
||||||
model: groq/*
|
|
||||||
api_key: os.environ/GROQ_API_KEY
|
|
||||||
mock_response: Hi!
|
|
||||||
- model_name: deepseek/*
|
|
||||||
litellm_params:
|
|
||||||
model: deepseek/*
|
|
||||||
api_key: os.environ/DEEPSEEK_API_KEY
|
|
||||||
- model_name: fake-openai-endpoint
|
|
||||||
litellm_params:
|
|
||||||
model: openai/fake
|
|
||||||
api_key: fake-key
|
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
||||||
- model_name: vertex_ai/gemini-*
|
|
||||||
litellm_params:
|
|
||||||
model: vertex_ai/gemini-*
|
|
||||||
- model_name: fake-azure-endpoint
|
|
||||||
litellm_params:
|
|
||||||
model: openai/429
|
|
||||||
api_key: fake-key
|
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
callbacks: ["prometheus"]
|
request_timeout: 10000
|
|
@ -573,20 +573,32 @@ class Router:
|
||||||
litellm.amoderation, call_type="moderation"
|
litellm.amoderation, call_type="moderation"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def discard(self):
|
def discard(self):
|
||||||
"""
|
"""
|
||||||
Pseudo-destructor to be invoked to clean up global data structures when router is no longer used.
|
Pseudo-destructor to be invoked to clean up global data structures when router is no longer used.
|
||||||
For now, unhook router's callbacks from all lists
|
For now, unhook router's callbacks from all lists
|
||||||
"""
|
"""
|
||||||
litellm.logging_callback_manager.remove_callback_from_list_by_object(litellm._async_success_callback, self)
|
litellm.logging_callback_manager.remove_callback_from_list_by_object(
|
||||||
litellm.logging_callback_manager.remove_callback_from_list_by_object(litellm.success_callback, self)
|
litellm._async_success_callback, self
|
||||||
litellm.logging_callback_manager.remove_callback_from_list_by_object(litellm._async_failure_callback, self)
|
)
|
||||||
litellm.logging_callback_manager.remove_callback_from_list_by_object(litellm.failure_callback, self)
|
litellm.logging_callback_manager.remove_callback_from_list_by_object(
|
||||||
litellm.logging_callback_manager.remove_callback_from_list_by_object(litellm.input_callback, self)
|
litellm.success_callback, self
|
||||||
litellm.logging_callback_manager.remove_callback_from_list_by_object(litellm.service_callback, self)
|
)
|
||||||
litellm.logging_callback_manager.remove_callback_from_list_by_object(litellm.callbacks, self)
|
litellm.logging_callback_manager.remove_callback_from_list_by_object(
|
||||||
|
litellm._async_failure_callback, self
|
||||||
|
)
|
||||||
|
litellm.logging_callback_manager.remove_callback_from_list_by_object(
|
||||||
|
litellm.failure_callback, self
|
||||||
|
)
|
||||||
|
litellm.logging_callback_manager.remove_callback_from_list_by_object(
|
||||||
|
litellm.input_callback, self
|
||||||
|
)
|
||||||
|
litellm.logging_callback_manager.remove_callback_from_list_by_object(
|
||||||
|
litellm.service_callback, self
|
||||||
|
)
|
||||||
|
litellm.logging_callback_manager.remove_callback_from_list_by_object(
|
||||||
|
litellm.callbacks, self
|
||||||
|
)
|
||||||
|
|
||||||
def _update_redis_cache(self, cache: RedisCache):
|
def _update_redis_cache(self, cache: RedisCache):
|
||||||
"""
|
"""
|
||||||
|
@ -602,7 +614,6 @@ class Router:
|
||||||
if self.cache.redis_cache is None:
|
if self.cache.redis_cache is None:
|
||||||
self.cache.redis_cache = cache
|
self.cache.redis_cache = cache
|
||||||
|
|
||||||
|
|
||||||
def initialize_assistants_endpoint(self):
|
def initialize_assistants_endpoint(self):
|
||||||
## INITIALIZE PASS THROUGH ASSISTANTS ENDPOINT ##
|
## INITIALIZE PASS THROUGH ASSISTANTS ENDPOINT ##
|
||||||
self.acreate_assistants = self.factory_function(litellm.acreate_assistants)
|
self.acreate_assistants = self.factory_function(litellm.acreate_assistants)
|
||||||
|
@ -902,6 +913,9 @@ class Router:
|
||||||
- in the semaphore, make a check against it's local rpm before running
|
- in the semaphore, make a check against it's local rpm before running
|
||||||
"""
|
"""
|
||||||
model_name = None
|
model_name = None
|
||||||
|
_timeout_debug_deployment_dict = (
|
||||||
|
{}
|
||||||
|
) # this is a temporary dict to debug timeout issues
|
||||||
try:
|
try:
|
||||||
verbose_router_logger.debug(
|
verbose_router_logger.debug(
|
||||||
f"Inside _acompletion()- model: {model}; kwargs: {kwargs}"
|
f"Inside _acompletion()- model: {model}; kwargs: {kwargs}"
|
||||||
|
@ -914,6 +928,7 @@ class Router:
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
request_kwargs=kwargs,
|
request_kwargs=kwargs,
|
||||||
)
|
)
|
||||||
|
_timeout_debug_deployment_dict = deployment
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
_duration = end_time - start_time
|
_duration = end_time - start_time
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
|
@ -1009,6 +1024,15 @@ class Router:
|
||||||
)
|
)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
except litellm.Timeout as e:
|
||||||
|
deployment_request_timeout_param = _timeout_debug_deployment_dict.get(
|
||||||
|
"litellm_params", {}
|
||||||
|
).get("request_timeout", None)
|
||||||
|
deployment_timeout_param = _timeout_debug_deployment_dict.get(
|
||||||
|
"litellm_params", {}
|
||||||
|
).get("timeout", None)
|
||||||
|
e.message += f"\n\nDeployment Info: request_timeout: {deployment_request_timeout_param}\ntimeout: {deployment_timeout_param}"
|
||||||
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_router_logger.info(
|
verbose_router_logger.info(
|
||||||
f"litellm.acompletion(model={model_name})\033[31m Exception {str(e)}\033[0m"
|
f"litellm.acompletion(model={model_name})\033[31m Exception {str(e)}\033[0m"
|
||||||
|
@ -3307,6 +3331,7 @@ class Router:
|
||||||
_num_healthy_deployments = 0
|
_num_healthy_deployments = 0
|
||||||
if healthy_deployments is not None and isinstance(healthy_deployments, list):
|
if healthy_deployments is not None and isinstance(healthy_deployments, list):
|
||||||
_num_healthy_deployments = len(healthy_deployments)
|
_num_healthy_deployments = len(healthy_deployments)
|
||||||
|
|
||||||
_num_all_deployments = 0
|
_num_all_deployments = 0
|
||||||
if all_deployments is not None and isinstance(all_deployments, list):
|
if all_deployments is not None and isinstance(all_deployments, list):
|
||||||
_num_all_deployments = len(all_deployments)
|
_num_all_deployments = len(all_deployments)
|
||||||
|
|
|
@ -17,6 +17,7 @@ from litellm.types.utils import (
|
||||||
Choices,
|
Choices,
|
||||||
PromptTokensDetailsWrapper,
|
PromptTokensDetailsWrapper,
|
||||||
CompletionTokensDetailsWrapper,
|
CompletionTokensDetailsWrapper,
|
||||||
|
Usage,
|
||||||
)
|
)
|
||||||
|
|
||||||
from litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response import (
|
from litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response import (
|
||||||
|
@ -750,3 +751,107 @@ def test_image_generation_openai_with_pydantic_warning(caplog):
|
||||||
assert isinstance(resp.data[0], ImageObject)
|
assert isinstance(resp.data[0], ImageObject)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Test failed with exception: {e}")
|
pytest.fail(f"Test failed with exception: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_convert_to_model_response_object_with_empty_str():
|
||||||
|
"""Test that convert_to_model_response_object handles empty strings correctly."""
|
||||||
|
|
||||||
|
args = {
|
||||||
|
"response_object": {
|
||||||
|
"id": "chatcmpl-B0b1BmxhH4iSoRvFVbBJdLbMwr346",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": None,
|
||||||
|
"message": {
|
||||||
|
"content": "",
|
||||||
|
"refusal": None,
|
||||||
|
"role": "assistant",
|
||||||
|
"audio": None,
|
||||||
|
"function_call": None,
|
||||||
|
"tool_calls": None,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1739481997,
|
||||||
|
"model": "gpt-4o-mini-2024-07-18",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": "default",
|
||||||
|
"system_fingerprint": "fp_bd83329f63",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 1,
|
||||||
|
"prompt_tokens": 121,
|
||||||
|
"total_tokens": 122,
|
||||||
|
"completion_tokens_details": {
|
||||||
|
"accepted_prediction_tokens": 0,
|
||||||
|
"audio_tokens": 0,
|
||||||
|
"reasoning_tokens": 0,
|
||||||
|
"rejected_prediction_tokens": 0,
|
||||||
|
},
|
||||||
|
"prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"model_response_object": ModelResponse(
|
||||||
|
id="chatcmpl-9f9e5ad2-d570-46fe-a5e0-4983e9774318",
|
||||||
|
created=1739481997,
|
||||||
|
model=None,
|
||||||
|
object="chat.completion",
|
||||||
|
system_fingerprint=None,
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason="stop",
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content=None,
|
||||||
|
role="assistant",
|
||||||
|
tool_calls=None,
|
||||||
|
function_call=None,
|
||||||
|
provider_specific_fields=None,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
usage=Usage(
|
||||||
|
completion_tokens=0,
|
||||||
|
prompt_tokens=0,
|
||||||
|
total_tokens=0,
|
||||||
|
completion_tokens_details=None,
|
||||||
|
prompt_tokens_details=None,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"response_type": "completion",
|
||||||
|
"stream": False,
|
||||||
|
"start_time": None,
|
||||||
|
"end_time": None,
|
||||||
|
"hidden_params": None,
|
||||||
|
"_response_headers": {
|
||||||
|
"date": "Thu, 13 Feb 2025 21:26:37 GMT",
|
||||||
|
"content-type": "application/json",
|
||||||
|
"transfer-encoding": "chunked",
|
||||||
|
"connection": "keep-alive",
|
||||||
|
"access-control-expose-headers": "X-Request-ID",
|
||||||
|
"openai-organization": "reliablekeystest",
|
||||||
|
"openai-processing-ms": "297",
|
||||||
|
"openai-version": "2020-10-01",
|
||||||
|
"x-ratelimit-limit-requests": "30000",
|
||||||
|
"x-ratelimit-limit-tokens": "150000000",
|
||||||
|
"x-ratelimit-remaining-requests": "29999",
|
||||||
|
"x-ratelimit-remaining-tokens": "149999846",
|
||||||
|
"x-ratelimit-reset-requests": "2ms",
|
||||||
|
"x-ratelimit-reset-tokens": "0s",
|
||||||
|
"x-request-id": "req_651030cbda2c80353086eba8fd0a54ec",
|
||||||
|
"strict-transport-security": "max-age=31536000; includeSubDomains; preload",
|
||||||
|
"cf-cache-status": "DYNAMIC",
|
||||||
|
"set-cookie": "__cf_bm=0ihEMDdqKfEr0I8iP4XZ7C6xEA5rJeAc11XFXNxZgyE-1739481997-1.0.1.1-v5jbjAWhMUZ0faO8q2izQljUQC.R85Vexb18A2MCyS895bur5eRxcguP0.WGY6EkxXSaOKN55VL3Pg3NOdq_xA; path=/; expires=Thu, 13-Feb-25 21:56:37 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=jrNMSOBRrxUnGgJ62BltpZZSNImfnEqPX9Uu8meGFLY-1739481997919-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
|
||||||
|
"x-content-type-options": "nosniff",
|
||||||
|
"server": "cloudflare",
|
||||||
|
"cf-ray": "9117e5d4caa1f7b5-LAX",
|
||||||
|
"content-encoding": "gzip",
|
||||||
|
"alt-svc": 'h3=":443"; ma=86400',
|
||||||
|
},
|
||||||
|
"convert_tool_call_to_json_mode": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
resp: ModelResponse = convert_to_model_response_object(**args)
|
||||||
|
assert resp is not None
|
||||||
|
assert resp.choices[0].message.content is not None
|
||||||
|
|
|
@ -4665,3 +4665,71 @@ def test_completion_o3_mini_temperature():
|
||||||
assert resp.choices[0].message.content is not None
|
assert resp.choices[0].message.content is not None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_gpt_4o_empty_str():
|
||||||
|
litellm._turn_on_debug()
|
||||||
|
from openai import OpenAI
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
client = OpenAI()
|
||||||
|
|
||||||
|
# Create response object matching OpenAI's format
|
||||||
|
mock_response_data = {
|
||||||
|
"id": "chatcmpl-B0W3vmiM78Xkgx7kI7dr7PC949DMS",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": None,
|
||||||
|
"message": {
|
||||||
|
"content": "",
|
||||||
|
"refusal": None,
|
||||||
|
"role": "assistant",
|
||||||
|
"audio": None,
|
||||||
|
"function_call": None,
|
||||||
|
"tool_calls": None,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1739462947,
|
||||||
|
"model": "gpt-4o-mini-2024-07-18",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": "default",
|
||||||
|
"system_fingerprint": "fp_bd83329f63",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 1,
|
||||||
|
"prompt_tokens": 121,
|
||||||
|
"total_tokens": 122,
|
||||||
|
"completion_tokens_details": {
|
||||||
|
"accepted_prediction_tokens": 0,
|
||||||
|
"audio_tokens": 0,
|
||||||
|
"reasoning_tokens": 0,
|
||||||
|
"rejected_prediction_tokens": 0,
|
||||||
|
},
|
||||||
|
"prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create a mock response object
|
||||||
|
mock_raw_response = MagicMock()
|
||||||
|
mock_raw_response.headers = {
|
||||||
|
"x-request-id": "123",
|
||||||
|
"openai-organization": "org-123",
|
||||||
|
"x-ratelimit-limit-requests": "100",
|
||||||
|
"x-ratelimit-remaining-requests": "99",
|
||||||
|
}
|
||||||
|
mock_raw_response.parse.return_value = mock_response_data
|
||||||
|
|
||||||
|
# Set up the mock completion
|
||||||
|
mock_completion = MagicMock()
|
||||||
|
mock_completion.return_value = mock_raw_response
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
client.chat.completions.with_raw_response, "create", mock_completion
|
||||||
|
) as mock_create:
|
||||||
|
resp = litellm.completion(
|
||||||
|
model="gpt-4o-mini",
|
||||||
|
messages=[{"role": "user", "content": ""}],
|
||||||
|
)
|
||||||
|
assert resp.choices[0].message.content is not None
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue