Litellm dev 01 25 2025 p4 (#8006)

* feat(main.py): use asyncio.sleep for mock_Timeout=true on async request adds unit testing to ensure proxy does not fail if specific Openai requests hang (e.g. recent o1 outage) * fix(streaming_handler.py): fix deepseek r1 return reasoning content on streaming Fixes https://github.com/BerriAI/litellm/issues/7942 * Revert "fix(streaming_handler.py): fix deepseek r1 return reasoning content on streaming" This reverts commit 7a052a64e3. * fix(deepseek-r-1): return reasoning_content as a top-level param ensures compatibility with existing tools that use it * fix: fix linting error
2025-04-25 02:34:29 +00:00 · 2025-01-26 08:01:05 -08:00 · 2025-01-26 08:01:05 -08:00 · 6bafdbc546
commit 6bafdbc546
parent 03eef5a2a0
8 changed files with 108 additions and 19 deletions
--- a/litellm/litellm_core_utils/streaming_handler.py
+++ b/litellm/litellm_core_utils/streaming_handler.py
@ -471,6 +471,7 @@ class CustomStreamWrapper:
            finish_reason = None
            logprobs = None
            usage = None
+
            if str_line and str_line.choices and len(str_line.choices) > 0:
                if (
                    str_line.choices[0].delta is not None
@ -750,6 +751,7 @@ class CustomStreamWrapper:
                "function_call" in completion_obj
                and completion_obj["function_call"] is not None
            )
+            or (model_response.choices[0].delta.provider_specific_fields is not None)
            or (
                "provider_specific_fields" in response_obj
                and response_obj["provider_specific_fields"] is not None
--- a/litellm/main.py
+++ b/litellm/main.py
@ -383,6 +383,10 @@ async def acompletion(
        - If `stream` is True, the function returns an async generator that yields completion lines.
    """
    fallbacks = kwargs.get("fallbacks", None)
+    mock_timeout = kwargs.get("mock_timeout", None)
+
+    if mock_timeout is True:
+        await _handle_mock_timeout_async(mock_timeout, timeout, model)

    loop = asyncio.get_event_loop()
    custom_llm_provider = kwargs.get("custom_llm_provider", None)
@ -565,12 +569,7 @@ def _handle_mock_timeout(
    model: str,
 ):
    if mock_timeout is True and timeout is not None:
-        if isinstance(timeout, float):
-            time.sleep(timeout)
-        elif isinstance(timeout, str):
-            time.sleep(float(timeout))
-        elif isinstance(timeout, httpx.Timeout) and timeout.connect is not None:
-            time.sleep(timeout.connect)
+        _sleep_for_timeout(timeout)
        raise litellm.Timeout(
            message="This is a mock timeout error",
            llm_provider="openai",
@ -578,6 +577,38 @@ def _handle_mock_timeout(
        )


+async def _handle_mock_timeout_async(
+    mock_timeout: Optional[bool],
+    timeout: Optional[Union[float, str, httpx.Timeout]],
+    model: str,
+):
+    if mock_timeout is True and timeout is not None:
+        await _sleep_for_timeout_async(timeout)
+        raise litellm.Timeout(
+            message="This is a mock timeout error",
+            llm_provider="openai",
+            model=model,
+        )
+
+
+def _sleep_for_timeout(timeout: Union[float, str, httpx.Timeout]):
+    if isinstance(timeout, float):
+        time.sleep(timeout)
+    elif isinstance(timeout, str):
+        time.sleep(float(timeout))
+    elif isinstance(timeout, httpx.Timeout) and timeout.connect is not None:
+        time.sleep(timeout.connect)
+
+
+async def _sleep_for_timeout_async(timeout: Union[float, str, httpx.Timeout]):
+    if isinstance(timeout, float):
+        await asyncio.sleep(timeout)
+    elif isinstance(timeout, str):
+        await asyncio.sleep(float(timeout))
+    elif isinstance(timeout, httpx.Timeout) and timeout.connect is not None:
+        await asyncio.sleep(timeout.connect)
+
+
 def mock_completion(
    model: str,
    messages: List,
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -15,4 +15,4 @@ model_list:
      model: anthropic.claude-3-sonnet-20240229-v1:0

 litellm_settings:
-  callbacks: ["langsmith"]
+  callbacks: ["langsmith"]
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -487,6 +487,8 @@ class Message(OpenAIObject):

        if provider_specific_fields:  # set if provider_specific_fields is not empty
            self.provider_specific_fields = provider_specific_fields
+            for k, v in provider_specific_fields.items():
+                setattr(self, k, v)

    def get(self, key, default=None):
        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
@ -522,18 +524,18 @@ class Delta(OpenAIObject):
        audio: Optional[ChatCompletionAudioResponse] = None,
        **params,
    ):
+        super(Delta, self).__init__(**params)
        provider_specific_fields: Dict[str, Any] = {}
        if "reasoning_content" in params:
            provider_specific_fields["reasoning_content"] = params["reasoning_content"]
-            del params["reasoning_content"]
-        super(Delta, self).__init__(**params)
+            setattr(self, "reasoning_content", params["reasoning_content"])
        self.content = content
        self.role = role
-        self.provider_specific_fields = provider_specific_fields
        # Set default values and correct types
        self.function_call: Optional[Union[FunctionCall, Any]] = None
        self.tool_calls: Optional[List[Union[ChatCompletionDeltaToolCall, Any]]] = None
        self.audio: Optional[ChatCompletionAudioResponse] = None
+
        if provider_specific_fields:  # set if provider_specific_fields is not empty
            self.provider_specific_fields = provider_specific_fields

@ -801,6 +803,7 @@ class StreamingChatCompletionChunk(OpenAIChatCompletionChunk):
            new_choice = StreamingChoices(**choice).model_dump()
            new_choices.append(new_choice)
        kwargs["choices"] = new_choices
+
        super().__init__(**kwargs)


--- a/proxy_server_config.yaml
+++ b/proxy_server_config.yaml
@ -97,6 +97,14 @@ model_list:
      rpm: 1000
    model_info:
      health_check_timeout: 1
+  - model_name: good-model
+    litellm_params:
+      model: openai/bad-model
+      api_key: os.environ/OPENAI_API_KEY
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+      rpm: 1000
+    model_info:
+      health_check_timeout: 1
  - model_name: "*"
    litellm_params:
      model: openai/*
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@ -4546,10 +4546,7 @@ def test_deepseek_reasoning_content_completion():
        messages=[{"role": "user", "content": "Tell me a joke."}],
    )

-    assert (
-        resp.choices[0].message.provider_specific_fields["reasoning_content"]
-        is not None
-    )
+    assert resp.choices[0].message.reasoning_content is not None


@pytest.mark.parametrize(
--- a/tests/local_testing/test_streaming.py
+++ b/tests/local_testing/test_streaming.py
@ -4066,7 +4066,7 @@ def test_mock_response_iterator_tool_use():


 def test_deepseek_reasoning_content_completion():
-    litellm.set_verbose = True
+    # litellm.set_verbose = True
    resp = litellm.completion(
        model="deepseek/deepseek-reasoner",
        messages=[{"role": "user", "content": "Tell me a joke."}],
@ -4076,8 +4076,7 @@ def test_deepseek_reasoning_content_completion():
    reasoning_content_exists = False
    for chunk in resp:
        print(f"chunk: {chunk}")
-        if chunk.choices[0].delta.content is not None:
-            if "reasoning_content" in chunk.choices[0].delta.provider_specific_fields:
-                reasoning_content_exists = True
-                break
+        if chunk.choices[0].delta.reasoning_content is not None:
+            reasoning_content_exists = True
+            break
    assert reasoning_content_exists
--- a/tests/test_fallbacks.py
+++ b/tests/test_fallbacks.py
@ -228,3 +228,52 @@ async def test_chat_completion_client_fallbacks_with_custom_message(has_access):
        except Exception as e:
            if has_access:
                pytest.fail("Expected this to work: {}".format(str(e)))
+
+
+import asyncio
+from openai import AsyncOpenAI
+from typing import List
+import time
+
+
+async def make_request(client: AsyncOpenAI, model: str) -> bool:
+    try:
+        await client.chat.completions.create(
+            model=model,
+            messages=[{"role": "user", "content": "Who was Alexander?"}],
+        )
+        return True
+    except Exception as e:
+        print(f"Error with {model}: {str(e)}")
+        return False
+
+
+async def run_good_model_test(client: AsyncOpenAI, num_requests: int) -> bool:
+    tasks = [make_request(client, "good-model") for _ in range(num_requests)]
+    good_results = await asyncio.gather(*tasks)
+    return all(good_results)
+
+
+@pytest.mark.asyncio
+async def test_chat_completion_bad_and_good_model():
+    """
+    Prod test - ensure even if bad model is down, good model is still working.
+    """
+    client = AsyncOpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
+    num_requests = 100
+    num_iterations = 3
+
+    for iteration in range(num_iterations):
+        print(f"\nIteration {iteration + 1}/{num_iterations}")
+        start_time = time.time()
+
+        # Fire and forget bad model requests
+        for _ in range(num_requests):
+            asyncio.create_task(make_request(client, "bad-model"))
+
+        # Wait only for good model requests
+        success = await run_good_model_test(client, num_requests)
+        print(
+            f"Iteration {iteration + 1}: {'✓' if success else '✗'} ({time.time() - start_time:.2f}s)"
+        )
+        assert success, "Not all good model requests succeeded"