mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Litellm dev 01 25 2025 p4 (#8006)
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 34s
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 34s
* feat(main.py): use asyncio.sleep for mock_Timeout=true on async request
adds unit testing to ensure proxy does not fail if specific Openai requests hang (e.g. recent o1 outage)
* fix(streaming_handler.py): fix deepseek r1 return reasoning content on streaming
Fixes https://github.com/BerriAI/litellm/issues/7942
* Revert "fix(streaming_handler.py): fix deepseek r1 return reasoning content on streaming"
This reverts commit 7a052a64e3
.
* fix(deepseek-r-1): return reasoning_content as a top-level param
ensures compatibility with existing tools that use it
* fix: fix linting error
This commit is contained in:
parent
03eef5a2a0
commit
6bafdbc546
8 changed files with 108 additions and 19 deletions
|
@ -471,6 +471,7 @@ class CustomStreamWrapper:
|
||||||
finish_reason = None
|
finish_reason = None
|
||||||
logprobs = None
|
logprobs = None
|
||||||
usage = None
|
usage = None
|
||||||
|
|
||||||
if str_line and str_line.choices and len(str_line.choices) > 0:
|
if str_line and str_line.choices and len(str_line.choices) > 0:
|
||||||
if (
|
if (
|
||||||
str_line.choices[0].delta is not None
|
str_line.choices[0].delta is not None
|
||||||
|
@ -750,6 +751,7 @@ class CustomStreamWrapper:
|
||||||
"function_call" in completion_obj
|
"function_call" in completion_obj
|
||||||
and completion_obj["function_call"] is not None
|
and completion_obj["function_call"] is not None
|
||||||
)
|
)
|
||||||
|
or (model_response.choices[0].delta.provider_specific_fields is not None)
|
||||||
or (
|
or (
|
||||||
"provider_specific_fields" in response_obj
|
"provider_specific_fields" in response_obj
|
||||||
and response_obj["provider_specific_fields"] is not None
|
and response_obj["provider_specific_fields"] is not None
|
||||||
|
|
|
@ -383,6 +383,10 @@ async def acompletion(
|
||||||
- If `stream` is True, the function returns an async generator that yields completion lines.
|
- If `stream` is True, the function returns an async generator that yields completion lines.
|
||||||
"""
|
"""
|
||||||
fallbacks = kwargs.get("fallbacks", None)
|
fallbacks = kwargs.get("fallbacks", None)
|
||||||
|
mock_timeout = kwargs.get("mock_timeout", None)
|
||||||
|
|
||||||
|
if mock_timeout is True:
|
||||||
|
await _handle_mock_timeout_async(mock_timeout, timeout, model)
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
custom_llm_provider = kwargs.get("custom_llm_provider", None)
|
custom_llm_provider = kwargs.get("custom_llm_provider", None)
|
||||||
|
@ -565,12 +569,7 @@ def _handle_mock_timeout(
|
||||||
model: str,
|
model: str,
|
||||||
):
|
):
|
||||||
if mock_timeout is True and timeout is not None:
|
if mock_timeout is True and timeout is not None:
|
||||||
if isinstance(timeout, float):
|
_sleep_for_timeout(timeout)
|
||||||
time.sleep(timeout)
|
|
||||||
elif isinstance(timeout, str):
|
|
||||||
time.sleep(float(timeout))
|
|
||||||
elif isinstance(timeout, httpx.Timeout) and timeout.connect is not None:
|
|
||||||
time.sleep(timeout.connect)
|
|
||||||
raise litellm.Timeout(
|
raise litellm.Timeout(
|
||||||
message="This is a mock timeout error",
|
message="This is a mock timeout error",
|
||||||
llm_provider="openai",
|
llm_provider="openai",
|
||||||
|
@ -578,6 +577,38 @@ def _handle_mock_timeout(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _handle_mock_timeout_async(
|
||||||
|
mock_timeout: Optional[bool],
|
||||||
|
timeout: Optional[Union[float, str, httpx.Timeout]],
|
||||||
|
model: str,
|
||||||
|
):
|
||||||
|
if mock_timeout is True and timeout is not None:
|
||||||
|
await _sleep_for_timeout_async(timeout)
|
||||||
|
raise litellm.Timeout(
|
||||||
|
message="This is a mock timeout error",
|
||||||
|
llm_provider="openai",
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _sleep_for_timeout(timeout: Union[float, str, httpx.Timeout]):
|
||||||
|
if isinstance(timeout, float):
|
||||||
|
time.sleep(timeout)
|
||||||
|
elif isinstance(timeout, str):
|
||||||
|
time.sleep(float(timeout))
|
||||||
|
elif isinstance(timeout, httpx.Timeout) and timeout.connect is not None:
|
||||||
|
time.sleep(timeout.connect)
|
||||||
|
|
||||||
|
|
||||||
|
async def _sleep_for_timeout_async(timeout: Union[float, str, httpx.Timeout]):
|
||||||
|
if isinstance(timeout, float):
|
||||||
|
await asyncio.sleep(timeout)
|
||||||
|
elif isinstance(timeout, str):
|
||||||
|
await asyncio.sleep(float(timeout))
|
||||||
|
elif isinstance(timeout, httpx.Timeout) and timeout.connect is not None:
|
||||||
|
await asyncio.sleep(timeout.connect)
|
||||||
|
|
||||||
|
|
||||||
def mock_completion(
|
def mock_completion(
|
||||||
model: str,
|
model: str,
|
||||||
messages: List,
|
messages: List,
|
||||||
|
|
|
@ -15,4 +15,4 @@ model_list:
|
||||||
model: anthropic.claude-3-sonnet-20240229-v1:0
|
model: anthropic.claude-3-sonnet-20240229-v1:0
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
callbacks: ["langsmith"]
|
callbacks: ["langsmith"]
|
|
@ -487,6 +487,8 @@ class Message(OpenAIObject):
|
||||||
|
|
||||||
if provider_specific_fields: # set if provider_specific_fields is not empty
|
if provider_specific_fields: # set if provider_specific_fields is not empty
|
||||||
self.provider_specific_fields = provider_specific_fields
|
self.provider_specific_fields = provider_specific_fields
|
||||||
|
for k, v in provider_specific_fields.items():
|
||||||
|
setattr(self, k, v)
|
||||||
|
|
||||||
def get(self, key, default=None):
|
def get(self, key, default=None):
|
||||||
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
||||||
|
@ -522,18 +524,18 @@ class Delta(OpenAIObject):
|
||||||
audio: Optional[ChatCompletionAudioResponse] = None,
|
audio: Optional[ChatCompletionAudioResponse] = None,
|
||||||
**params,
|
**params,
|
||||||
):
|
):
|
||||||
|
super(Delta, self).__init__(**params)
|
||||||
provider_specific_fields: Dict[str, Any] = {}
|
provider_specific_fields: Dict[str, Any] = {}
|
||||||
if "reasoning_content" in params:
|
if "reasoning_content" in params:
|
||||||
provider_specific_fields["reasoning_content"] = params["reasoning_content"]
|
provider_specific_fields["reasoning_content"] = params["reasoning_content"]
|
||||||
del params["reasoning_content"]
|
setattr(self, "reasoning_content", params["reasoning_content"])
|
||||||
super(Delta, self).__init__(**params)
|
|
||||||
self.content = content
|
self.content = content
|
||||||
self.role = role
|
self.role = role
|
||||||
self.provider_specific_fields = provider_specific_fields
|
|
||||||
# Set default values and correct types
|
# Set default values and correct types
|
||||||
self.function_call: Optional[Union[FunctionCall, Any]] = None
|
self.function_call: Optional[Union[FunctionCall, Any]] = None
|
||||||
self.tool_calls: Optional[List[Union[ChatCompletionDeltaToolCall, Any]]] = None
|
self.tool_calls: Optional[List[Union[ChatCompletionDeltaToolCall, Any]]] = None
|
||||||
self.audio: Optional[ChatCompletionAudioResponse] = None
|
self.audio: Optional[ChatCompletionAudioResponse] = None
|
||||||
|
|
||||||
if provider_specific_fields: # set if provider_specific_fields is not empty
|
if provider_specific_fields: # set if provider_specific_fields is not empty
|
||||||
self.provider_specific_fields = provider_specific_fields
|
self.provider_specific_fields = provider_specific_fields
|
||||||
|
|
||||||
|
@ -801,6 +803,7 @@ class StreamingChatCompletionChunk(OpenAIChatCompletionChunk):
|
||||||
new_choice = StreamingChoices(**choice).model_dump()
|
new_choice = StreamingChoices(**choice).model_dump()
|
||||||
new_choices.append(new_choice)
|
new_choices.append(new_choice)
|
||||||
kwargs["choices"] = new_choices
|
kwargs["choices"] = new_choices
|
||||||
|
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -97,6 +97,14 @@ model_list:
|
||||||
rpm: 1000
|
rpm: 1000
|
||||||
model_info:
|
model_info:
|
||||||
health_check_timeout: 1
|
health_check_timeout: 1
|
||||||
|
- model_name: good-model
|
||||||
|
litellm_params:
|
||||||
|
model: openai/bad-model
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
rpm: 1000
|
||||||
|
model_info:
|
||||||
|
health_check_timeout: 1
|
||||||
- model_name: "*"
|
- model_name: "*"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/*
|
model: openai/*
|
||||||
|
|
|
@ -4546,10 +4546,7 @@ def test_deepseek_reasoning_content_completion():
|
||||||
messages=[{"role": "user", "content": "Tell me a joke."}],
|
messages=[{"role": "user", "content": "Tell me a joke."}],
|
||||||
)
|
)
|
||||||
|
|
||||||
assert (
|
assert resp.choices[0].message.reasoning_content is not None
|
||||||
resp.choices[0].message.provider_specific_fields["reasoning_content"]
|
|
||||||
is not None
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
|
|
@ -4066,7 +4066,7 @@ def test_mock_response_iterator_tool_use():
|
||||||
|
|
||||||
|
|
||||||
def test_deepseek_reasoning_content_completion():
|
def test_deepseek_reasoning_content_completion():
|
||||||
litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
resp = litellm.completion(
|
resp = litellm.completion(
|
||||||
model="deepseek/deepseek-reasoner",
|
model="deepseek/deepseek-reasoner",
|
||||||
messages=[{"role": "user", "content": "Tell me a joke."}],
|
messages=[{"role": "user", "content": "Tell me a joke."}],
|
||||||
|
@ -4076,8 +4076,7 @@ def test_deepseek_reasoning_content_completion():
|
||||||
reasoning_content_exists = False
|
reasoning_content_exists = False
|
||||||
for chunk in resp:
|
for chunk in resp:
|
||||||
print(f"chunk: {chunk}")
|
print(f"chunk: {chunk}")
|
||||||
if chunk.choices[0].delta.content is not None:
|
if chunk.choices[0].delta.reasoning_content is not None:
|
||||||
if "reasoning_content" in chunk.choices[0].delta.provider_specific_fields:
|
reasoning_content_exists = True
|
||||||
reasoning_content_exists = True
|
break
|
||||||
break
|
|
||||||
assert reasoning_content_exists
|
assert reasoning_content_exists
|
||||||
|
|
|
@ -228,3 +228,52 @@ async def test_chat_completion_client_fallbacks_with_custom_message(has_access):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if has_access:
|
if has_access:
|
||||||
pytest.fail("Expected this to work: {}".format(str(e)))
|
pytest.fail("Expected this to work: {}".format(str(e)))
|
||||||
|
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
from typing import List
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
async def make_request(client: AsyncOpenAI, model: str) -> bool:
|
||||||
|
try:
|
||||||
|
await client.chat.completions.create(
|
||||||
|
model=model,
|
||||||
|
messages=[{"role": "user", "content": "Who was Alexander?"}],
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error with {model}: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def run_good_model_test(client: AsyncOpenAI, num_requests: int) -> bool:
|
||||||
|
tasks = [make_request(client, "good-model") for _ in range(num_requests)]
|
||||||
|
good_results = await asyncio.gather(*tasks)
|
||||||
|
return all(good_results)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_chat_completion_bad_and_good_model():
|
||||||
|
"""
|
||||||
|
Prod test - ensure even if bad model is down, good model is still working.
|
||||||
|
"""
|
||||||
|
client = AsyncOpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||||
|
num_requests = 100
|
||||||
|
num_iterations = 3
|
||||||
|
|
||||||
|
for iteration in range(num_iterations):
|
||||||
|
print(f"\nIteration {iteration + 1}/{num_iterations}")
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
# Fire and forget bad model requests
|
||||||
|
for _ in range(num_requests):
|
||||||
|
asyncio.create_task(make_request(client, "bad-model"))
|
||||||
|
|
||||||
|
# Wait only for good model requests
|
||||||
|
success = await run_good_model_test(client, num_requests)
|
||||||
|
print(
|
||||||
|
f"Iteration {iteration + 1}: {'✓' if success else '✗'} ({time.time() - start_time:.2f}s)"
|
||||||
|
)
|
||||||
|
assert success, "Not all good model requests succeeded"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue