Litellm dev 01 25 2025 p4 (#8006)
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 34s

* feat(main.py): use asyncio.sleep for mock_Timeout=true on async request

adds unit testing to ensure proxy does not fail if specific Openai requests hang (e.g. recent o1 outage)

* fix(streaming_handler.py): fix deepseek r1 return reasoning content on streaming

Fixes https://github.com/BerriAI/litellm/issues/7942

* Revert "fix(streaming_handler.py): fix deepseek r1 return reasoning content on streaming"

This reverts commit 7a052a64e3.

* fix(deepseek-r-1): return reasoning_content as a top-level param

ensures compatibility with existing tools that use it

* fix: fix linting error
This commit is contained in:
Krish Dholakia 2025-01-26 08:01:05 -08:00 committed by GitHub
parent 03eef5a2a0
commit 6bafdbc546
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 108 additions and 19 deletions

View file

@ -471,6 +471,7 @@ class CustomStreamWrapper:
finish_reason = None
logprobs = None
usage = None
if str_line and str_line.choices and len(str_line.choices) > 0:
if (
str_line.choices[0].delta is not None
@ -750,6 +751,7 @@ class CustomStreamWrapper:
"function_call" in completion_obj
and completion_obj["function_call"] is not None
)
or (model_response.choices[0].delta.provider_specific_fields is not None)
or (
"provider_specific_fields" in response_obj
and response_obj["provider_specific_fields"] is not None

View file

@ -383,6 +383,10 @@ async def acompletion(
- If `stream` is True, the function returns an async generator that yields completion lines.
"""
fallbacks = kwargs.get("fallbacks", None)
mock_timeout = kwargs.get("mock_timeout", None)
if mock_timeout is True:
await _handle_mock_timeout_async(mock_timeout, timeout, model)
loop = asyncio.get_event_loop()
custom_llm_provider = kwargs.get("custom_llm_provider", None)
@ -565,12 +569,7 @@ def _handle_mock_timeout(
model: str,
):
if mock_timeout is True and timeout is not None:
if isinstance(timeout, float):
time.sleep(timeout)
elif isinstance(timeout, str):
time.sleep(float(timeout))
elif isinstance(timeout, httpx.Timeout) and timeout.connect is not None:
time.sleep(timeout.connect)
_sleep_for_timeout(timeout)
raise litellm.Timeout(
message="This is a mock timeout error",
llm_provider="openai",
@ -578,6 +577,38 @@ def _handle_mock_timeout(
)
async def _handle_mock_timeout_async(
mock_timeout: Optional[bool],
timeout: Optional[Union[float, str, httpx.Timeout]],
model: str,
):
if mock_timeout is True and timeout is not None:
await _sleep_for_timeout_async(timeout)
raise litellm.Timeout(
message="This is a mock timeout error",
llm_provider="openai",
model=model,
)
def _sleep_for_timeout(timeout: Union[float, str, httpx.Timeout]):
if isinstance(timeout, float):
time.sleep(timeout)
elif isinstance(timeout, str):
time.sleep(float(timeout))
elif isinstance(timeout, httpx.Timeout) and timeout.connect is not None:
time.sleep(timeout.connect)
async def _sleep_for_timeout_async(timeout: Union[float, str, httpx.Timeout]):
if isinstance(timeout, float):
await asyncio.sleep(timeout)
elif isinstance(timeout, str):
await asyncio.sleep(float(timeout))
elif isinstance(timeout, httpx.Timeout) and timeout.connect is not None:
await asyncio.sleep(timeout.connect)
def mock_completion(
model: str,
messages: List,

View file

@ -15,4 +15,4 @@ model_list:
model: anthropic.claude-3-sonnet-20240229-v1:0
litellm_settings:
callbacks: ["langsmith"]
callbacks: ["langsmith"]

View file

@ -487,6 +487,8 @@ class Message(OpenAIObject):
if provider_specific_fields: # set if provider_specific_fields is not empty
self.provider_specific_fields = provider_specific_fields
for k, v in provider_specific_fields.items():
setattr(self, k, v)
def get(self, key, default=None):
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
@ -522,18 +524,18 @@ class Delta(OpenAIObject):
audio: Optional[ChatCompletionAudioResponse] = None,
**params,
):
super(Delta, self).__init__(**params)
provider_specific_fields: Dict[str, Any] = {}
if "reasoning_content" in params:
provider_specific_fields["reasoning_content"] = params["reasoning_content"]
del params["reasoning_content"]
super(Delta, self).__init__(**params)
setattr(self, "reasoning_content", params["reasoning_content"])
self.content = content
self.role = role
self.provider_specific_fields = provider_specific_fields
# Set default values and correct types
self.function_call: Optional[Union[FunctionCall, Any]] = None
self.tool_calls: Optional[List[Union[ChatCompletionDeltaToolCall, Any]]] = None
self.audio: Optional[ChatCompletionAudioResponse] = None
if provider_specific_fields: # set if provider_specific_fields is not empty
self.provider_specific_fields = provider_specific_fields
@ -801,6 +803,7 @@ class StreamingChatCompletionChunk(OpenAIChatCompletionChunk):
new_choice = StreamingChoices(**choice).model_dump()
new_choices.append(new_choice)
kwargs["choices"] = new_choices
super().__init__(**kwargs)

View file

@ -97,6 +97,14 @@ model_list:
rpm: 1000
model_info:
health_check_timeout: 1
- model_name: good-model
litellm_params:
model: openai/bad-model
api_key: os.environ/OPENAI_API_KEY
api_base: https://exampleopenaiendpoint-production.up.railway.app/
rpm: 1000
model_info:
health_check_timeout: 1
- model_name: "*"
litellm_params:
model: openai/*

View file

@ -4546,10 +4546,7 @@ def test_deepseek_reasoning_content_completion():
messages=[{"role": "user", "content": "Tell me a joke."}],
)
assert (
resp.choices[0].message.provider_specific_fields["reasoning_content"]
is not None
)
assert resp.choices[0].message.reasoning_content is not None
@pytest.mark.parametrize(

View file

@ -4066,7 +4066,7 @@ def test_mock_response_iterator_tool_use():
def test_deepseek_reasoning_content_completion():
litellm.set_verbose = True
# litellm.set_verbose = True
resp = litellm.completion(
model="deepseek/deepseek-reasoner",
messages=[{"role": "user", "content": "Tell me a joke."}],
@ -4076,8 +4076,7 @@ def test_deepseek_reasoning_content_completion():
reasoning_content_exists = False
for chunk in resp:
print(f"chunk: {chunk}")
if chunk.choices[0].delta.content is not None:
if "reasoning_content" in chunk.choices[0].delta.provider_specific_fields:
reasoning_content_exists = True
break
if chunk.choices[0].delta.reasoning_content is not None:
reasoning_content_exists = True
break
assert reasoning_content_exists

View file

@ -228,3 +228,52 @@ async def test_chat_completion_client_fallbacks_with_custom_message(has_access):
except Exception as e:
if has_access:
pytest.fail("Expected this to work: {}".format(str(e)))
import asyncio
from openai import AsyncOpenAI
from typing import List
import time
async def make_request(client: AsyncOpenAI, model: str) -> bool:
try:
await client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": "Who was Alexander?"}],
)
return True
except Exception as e:
print(f"Error with {model}: {str(e)}")
return False
async def run_good_model_test(client: AsyncOpenAI, num_requests: int) -> bool:
tasks = [make_request(client, "good-model") for _ in range(num_requests)]
good_results = await asyncio.gather(*tasks)
return all(good_results)
@pytest.mark.asyncio
async def test_chat_completion_bad_and_good_model():
"""
Prod test - ensure even if bad model is down, good model is still working.
"""
client = AsyncOpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
num_requests = 100
num_iterations = 3
for iteration in range(num_iterations):
print(f"\nIteration {iteration + 1}/{num_iterations}")
start_time = time.time()
# Fire and forget bad model requests
for _ in range(num_requests):
asyncio.create_task(make_request(client, "bad-model"))
# Wait only for good model requests
success = await run_good_model_test(client, num_requests)
print(
f"Iteration {iteration + 1}: {'' if success else ''} ({time.time() - start_time:.2f}s)"
)
assert success, "Not all good model requests succeeded"