diff --git a/litellm/__init__.py b/litellm/__init__.py
index 9ca1517c92..d66707f8b3 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -277,8 +277,6 @@ disable_end_user_cost_tracking_prometheus_only: Optional[bool] = None
 custom_prometheus_metadata_labels: List[str] = []
 #### REQUEST PRIORITIZATION ####
 priority_reservation: Optional[Dict[str, float]] = None
-
-
 force_ipv4: bool = (
     False  # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
 )
diff --git a/litellm/litellm_core_utils/get_litellm_params.py b/litellm/litellm_core_utils/get_litellm_params.py
index c0fbb1cb97..cf62375f33 100644
--- a/litellm/litellm_core_utils/get_litellm_params.py
+++ b/litellm/litellm_core_utils/get_litellm_params.py
@@ -57,6 +57,7 @@ def get_litellm_params(
     prompt_variables: Optional[dict] = None,
     async_call: Optional[bool] = None,
     ssl_verify: Optional[bool] = None,
+    merge_reasoning_content_in_choices: Optional[bool] = None,
     **kwargs,
 ) -> dict:
     litellm_params = {
@@ -97,5 +98,6 @@ def get_litellm_params(
         "prompt_variables": prompt_variables,
         "async_call": async_call,
         "ssl_verify": ssl_verify,
+        "merge_reasoning_content_in_choices": merge_reasoning_content_in_choices,
     }
     return litellm_params
diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py
index 4ce27bfeca..2c7af8d5ba 100644
--- a/litellm/litellm_core_utils/streaming_handler.py
+++ b/litellm/litellm_core_utils/streaming_handler.py
@@ -15,6 +15,7 @@ from litellm import verbose_logger
 from litellm.litellm_core_utils.redact_messages import LiteLLMLoggingObject
 from litellm.litellm_core_utils.thread_pool_executor import executor
 from litellm.types.llms.openai import ChatCompletionChunk
+from litellm.types.router import GenericLiteLLMParams
 from litellm.types.utils import Delta
 from litellm.types.utils import GenericStreamingChunk as GChunk
 from litellm.types.utils import (
@@ -70,6 +71,17 @@ class CustomStreamWrapper:
         self.completion_stream = completion_stream
         self.sent_first_chunk = False
         self.sent_last_chunk = False
+
+        litellm_params: GenericLiteLLMParams = GenericLiteLLMParams(
+            **self.logging_obj.model_call_details.get("litellm_params", {})
+        )
+        self.merge_reasoning_content_in_choices: bool = (
+            litellm_params.merge_reasoning_content_in_choices or False
+        )
+        self.sent_first_thinking_block = False
+        self.sent_last_thinking_block = False
+        self.thinking_content = ""
+
         self.system_fingerprint: Optional[str] = None
         self.received_finish_reason: Optional[str] = None
         self.intermittent_finish_reason: Optional[str] = (
@@ -87,12 +99,7 @@ class CustomStreamWrapper:
         self.holding_chunk = ""
         self.complete_response = ""
         self.response_uptil_now = ""
-        _model_info = (
-            self.logging_obj.model_call_details.get("litellm_params", {}).get(
-                "model_info", {}
-            )
-            or {}
-        )
+        _model_info: Dict = litellm_params.model_info or {}
 
         _api_base = get_api_base(
             model=model or "",
@@ -873,6 +880,10 @@ class CustomStreamWrapper:
                     _index: Optional[int] = completion_obj.get("index")
                     if _index is not None:
                         model_response.choices[0].index = _index
+
+                self._optional_combine_thinking_block_in_choices(
+                    model_response=model_response
+                )
                 print_verbose(f"returning model_response: {model_response}")
                 return model_response
             else:
@@ -929,6 +940,48 @@ class CustomStreamWrapper:
                 self.chunks.append(model_response)
             return
 
+    def _optional_combine_thinking_block_in_choices(
+        self, model_response: ModelResponseStream
+    ) -> None:
+        """
+        UI's Like OpenWebUI expect to get 1 chunk with <think>...</think> tags in the chunk content
+
+        In place updates the model_response object with reasoning_content in content with <think>...</think> tags
+
+        Enabled when `merge_reasoning_content_in_choices=True` passed in request params
+
+
+        """
+        if self.merge_reasoning_content_in_choices is True:
+            reasoning_content = getattr(
+                model_response.choices[0].delta, "reasoning_content", None
+            )
+            if reasoning_content:
+                if self.sent_first_thinking_block is False:
+                    model_response.choices[0].delta.content += (
+                        "<think>" + reasoning_content
+                    )
+                    self.sent_first_thinking_block = True
+                elif (
+                    self.sent_first_thinking_block is True
+                    and hasattr(model_response.choices[0].delta, "reasoning_content")
+                    and model_response.choices[0].delta.reasoning_content
+                ):
+                    model_response.choices[0].delta.content = reasoning_content
+            elif (
+                self.sent_first_thinking_block is True
+                and not self.sent_last_thinking_block
+                and model_response.choices[0].delta.content
+            ):
+                model_response.choices[0].delta.content = (
+                    "</think>" + model_response.choices[0].delta.content
+                )
+                self.sent_last_thinking_block = True
+
+            if hasattr(model_response.choices[0].delta, "reasoning_content"):
+                del model_response.choices[0].delta.reasoning_content
+        return
+
     def chunk_creator(self, chunk: Any):  # type: ignore  # noqa: PLR0915
         model_response = self.model_response_creator()
         response_obj: Dict[str, Any] = {}
diff --git a/litellm/main.py b/litellm/main.py
index 28dbf45102..1699e79cf7 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -1159,6 +1159,9 @@ def completion(  # type: ignore # noqa: PLR0915
             prompt_id=prompt_id,
             prompt_variables=prompt_variables,
             ssl_verify=ssl_verify,
+            merge_reasoning_content_in_choices=kwargs.get(
+                "merge_reasoning_content_in_choices", None
+            ),
         )
         logging.update_environment_variables(
             model=model,
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index eef4a55ed3..258eef6307 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -1,27 +1,13 @@
 model_list:
-  - model_name: fake-openai-endpoint
+  - model_name: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0
     litellm_params:
-      model: openai/my-fake-model
-      api_key: my-fake-key
-      api_base: https://exampleopenaiendpoint-production.up.railway.app/
-  - model_name: claude-special-alias
-    litellm_params:
-      model: anthropic/claude-3-haiku-20240307
-      api_key: os.environ/ANTHROPIC_API_KEY
-  - model_name: claude-3-5-sonnet-20241022
-    litellm_params:
-      model: anthropic/claude-3-5-sonnet-20241022
-      api_key: os.environ/ANTHROPIC_API_KEY
-  - model_name: claude-3-7-sonnet-20250219
-    litellm_params:
-      model: anthropic/claude-3-7-sonnet-20250219
-      api_key: os.environ/ANTHROPIC_API_KEY
-  - model_name: anthropic/*
-    litellm_params:
-      model: anthropic/*
-      api_key: os.environ/ANTHROPIC_API_KEY
+
+      model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0
+      thinking: {"type": "enabled", "budget_tokens": 1024}
+      max_tokens: 1080
+      merge_reasoning_content_in_choices: true
+
 
 general_settings:
   store_model_in_db: true
   store_prompts_in_spend_logs: true
-
diff --git a/litellm/types/router.py b/litellm/types/router.py
index e2c92783da..9a5fb168da 100644
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@@ -192,6 +192,8 @@ class GenericLiteLLMParams(BaseModel):
     budget_duration: Optional[str] = None
     use_in_pass_through: Optional[bool] = False
     model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
+    merge_reasoning_content_in_choices: Optional[bool] = False
+    model_info: Optional[Dict] = None
 
     def __init__(
         self,
@@ -231,6 +233,9 @@ class GenericLiteLLMParams(BaseModel):
         budget_duration: Optional[str] = None,
         # Pass through params
         use_in_pass_through: Optional[bool] = False,
+        # This will merge the reasoning content in the choices
+        merge_reasoning_content_in_choices: Optional[bool] = False,
+        model_info: Optional[Dict] = None,
         **params,
     ):
         args = locals()
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index 894ef70933..5d1bef2762 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -21,6 +21,8 @@ from openai.types.moderation_create_response import Moderation, ModerationCreate
 from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
 from typing_extensions import Callable, Dict, Required, TypedDict, override
 
+import litellm
+
 from ..litellm_core_utils.core_helpers import map_finish_reason
 from .guardrails import GuardrailEventHooks
 from .llms.openai import (
@@ -1803,6 +1805,7 @@ all_litellm_params = [
     "max_budget",
     "budget_duration",
     "use_in_pass_through",
+    "merge_reasoning_content_in_choices",
 ] + list(StandardCallbackDynamicParams.__annotations__.keys())
 
 
diff --git a/tests/litellm/litellm_core_utils/test_streaming_handler.py b/tests/litellm/litellm_core_utils/test_streaming_handler.py
index 7595c19155..54d178e3ac 100644
--- a/tests/litellm/litellm_core_utils/test_streaming_handler.py
+++ b/tests/litellm/litellm_core_utils/test_streaming_handler.py
@@ -46,3 +46,213 @@ def test_is_chunk_non_empty(initialized_custom_stream_wrapper: CustomStreamWrapp
         model_response=ModelResponseStream(**chunk),
         response_obj=MagicMock(),
     )
+
+
+def test_optional_combine_thinking_block_in_choices(
+    initialized_custom_stream_wrapper: CustomStreamWrapper,
+):
+    """Test that reasoning_content is properly combined with content using <think> tags"""
+    # Setup the wrapper to use the merge feature
+    initialized_custom_stream_wrapper.merge_reasoning_content_in_choices = True
+
+    # First chunk with reasoning_content - should add <think> tag
+    first_chunk = {
+        "id": "chunk1",
+        "object": "chat.completion.chunk",
+        "created": 1741037890,
+        "model": "deepseek-reasoner",
+        "choices": [
+            {
+                "index": 0,
+                "delta": {
+                    "content": "",
+                    "reasoning_content": "Let me think about this",
+                },
+                "finish_reason": None,
+            }
+        ],
+    }
+
+    # Middle chunk with more reasoning_content
+    middle_chunk = {
+        "id": "chunk2",
+        "object": "chat.completion.chunk",
+        "created": 1741037891,
+        "model": "deepseek-reasoner",
+        "choices": [
+            {
+                "index": 0,
+                "delta": {"content": "", "reasoning_content": " step by step"},
+                "finish_reason": None,
+            }
+        ],
+    }
+
+    # Final chunk with actual content - should add </think> tag
+    final_chunk = {
+        "id": "chunk3",
+        "object": "chat.completion.chunk",
+        "created": 1741037892,
+        "model": "deepseek-reasoner",
+        "choices": [
+            {
+                "index": 0,
+                "delta": {"content": "The answer is 42", "reasoning_content": None},
+                "finish_reason": None,
+            }
+        ],
+    }
+
+    # Process first chunk
+    first_response = ModelResponseStream(**first_chunk)
+    initialized_custom_stream_wrapper._optional_combine_thinking_block_in_choices(
+        first_response
+    )
+    print("first_response", json.dumps(first_response, indent=4, default=str))
+    assert first_response.choices[0].delta.content == "<think>Let me think about this"
+    # assert the response does not have attribute reasoning_content
+    assert not hasattr(first_response.choices[0].delta, "reasoning_content")
+
+    assert initialized_custom_stream_wrapper.sent_first_thinking_block is True
+
+    # Process middle chunk
+    middle_response = ModelResponseStream(**middle_chunk)
+    initialized_custom_stream_wrapper._optional_combine_thinking_block_in_choices(
+        middle_response
+    )
+    print("middle_response", json.dumps(middle_response, indent=4, default=str))
+    assert middle_response.choices[0].delta.content == " step by step"
+    assert not hasattr(middle_response.choices[0].delta, "reasoning_content")
+
+    # Process final chunk
+    final_response = ModelResponseStream(**final_chunk)
+    initialized_custom_stream_wrapper._optional_combine_thinking_block_in_choices(
+        final_response
+    )
+    print("final_response", json.dumps(final_response, indent=4, default=str))
+    assert final_response.choices[0].delta.content == "</think>The answer is 42"
+    assert initialized_custom_stream_wrapper.sent_last_thinking_block is True
+    assert not hasattr(final_response.choices[0].delta, "reasoning_content")
+
+
+def test_multi_chunk_reasoning_and_content(
+    initialized_custom_stream_wrapper: CustomStreamWrapper,
+):
+    """Test handling of multiple reasoning chunks followed by multiple content chunks"""
+    # Setup the wrapper to use the merge feature
+    initialized_custom_stream_wrapper.merge_reasoning_content_in_choices = True
+    initialized_custom_stream_wrapper.sent_first_thinking_block = False
+    initialized_custom_stream_wrapper.sent_last_thinking_block = False
+
+    # Create test chunks
+    chunks = [
+        # Chunk 1: First reasoning chunk
+        {
+            "id": "chunk1",
+            "object": "chat.completion.chunk",
+            "created": 1741037890,
+            "model": "deepseek-reasoner",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "content": "",
+                        "reasoning_content": "To solve this problem",
+                    },
+                    "finish_reason": None,
+                }
+            ],
+        },
+        # Chunk 2: Second reasoning chunk
+        {
+            "id": "chunk2",
+            "object": "chat.completion.chunk",
+            "created": 1741037891,
+            "model": "deepseek-reasoner",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "content": "",
+                        "reasoning_content": ", I need to calculate 6 * 7",
+                    },
+                    "finish_reason": None,
+                }
+            ],
+        },
+        # Chunk 3: Third reasoning chunk
+        {
+            "id": "chunk3",
+            "object": "chat.completion.chunk",
+            "created": 1741037892,
+            "model": "deepseek-reasoner",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {"content": "", "reasoning_content": " which equals 42"},
+                    "finish_reason": None,
+                }
+            ],
+        },
+        # Chunk 4: First content chunk (transition from reasoning to content)
+        {
+            "id": "chunk4",
+            "object": "chat.completion.chunk",
+            "created": 1741037893,
+            "model": "deepseek-reasoner",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "content": "The answer to your question",
+                        "reasoning_content": None,
+                    },
+                    "finish_reason": None,
+                }
+            ],
+        },
+        # Chunk 5: Second content chunk
+        {
+            "id": "chunk5",
+            "object": "chat.completion.chunk",
+            "created": 1741037894,
+            "model": "deepseek-reasoner",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {"content": " is 42.", "reasoning_content": None},
+                    "finish_reason": None,
+                }
+            ],
+        },
+    ]
+
+    # Expected content after processing each chunk
+    expected_contents = [
+        "<think>To solve this problem",
+        ", I need to calculate 6 * 7",
+        " which equals 42",
+        "</think>The answer to your question",
+        " is 42.",
+    ]
+
+    # Process each chunk and verify results
+    for i, (chunk, expected_content) in enumerate(zip(chunks, expected_contents)):
+        response = ModelResponseStream(**chunk)
+        initialized_custom_stream_wrapper._optional_combine_thinking_block_in_choices(
+            response
+        )
+
+        # Check content
+        assert (
+            response.choices[0].delta.content == expected_content
+        ), f"Chunk {i+1}: content mismatch"
+
+        # Check reasoning_content was removed
+        assert not hasattr(
+            response.choices[0].delta, "reasoning_content"
+        ), f"Chunk {i+1}: reasoning_content should be removed"
+
+    # Verify final state
+    assert initialized_custom_stream_wrapper.sent_first_thinking_block is True
+    assert initialized_custom_stream_wrapper.sent_last_thinking_block is True
diff --git a/tests/llm_translation/test_bedrock_completion.py b/tests/llm_translation/test_bedrock_completion.py
index 660da72f24..cc8cc163d4 100644
--- a/tests/llm_translation/test_bedrock_completion.py
+++ b/tests/llm_translation/test_bedrock_completion.py
@@ -2841,3 +2841,72 @@ async def test_bedrock_thinking_in_assistant_message(sync_mode):
             "Alright, let's get started with resolving this issue about implementing"
             in json_data
         )
+
+
+@pytest.mark.asyncio
+async def test_bedrock_stream_thinking_content_openwebui():
+    """
+    When merge_reasoning_content_in_choices=True
+
+    The content should be collected as
+
+    ```
+    <think>
+    I am a helpful assistant, the user wants to know who I am
+    </think>
+
+    Hi I am Anthropic, I am a helpful assistant
+
+    ```
+    """
+    response = await litellm.acompletion(
+        model="bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
+        messages=[{"role": "user", "content": "Hello who is this?"}],
+        stream=True,
+        max_tokens=1080,
+        thinking={"type": "enabled", "budget_tokens": 1024},
+        merge_reasoning_content_in_choices=True,
+    )
+    content = ""
+    async for chunk in response:
+        content += chunk.choices[0].delta.content or ""
+
+        # OpenWebUI expects the reasoning_content to be removed, otherwise this will appear as duplicate thinking blocks
+        assert getattr(chunk.choices[0].delta, "reasoning_content", None) is None
+        print(chunk)
+
+    print("collected content", content)
+
+    # Assert that the content follows the expected format with exactly one thinking section
+    think_open_pos = content.find("<think>")
+    think_close_pos = content.find("</think>")
+
+    # Assert there's exactly one opening and closing tag
+    assert think_open_pos >= 0, "Opening <think> tag not found"
+    assert think_close_pos > 0, "Closing </think> tag not found"
+    assert (
+        content.count("<think>") == 1
+    ), "There should be exactly one opening <think> tag"
+    assert (
+        content.count("</think>") == 1
+    ), "There should be exactly one closing </think> tag"
+
+    # Assert the opening tag comes before the closing tag
+    assert (
+        think_open_pos < think_close_pos
+    ), "Opening tag should come before closing tag"
+
+    # Assert there's content between the tags
+    thinking_content = content[think_open_pos + 7 : think_close_pos]
+    assert (
+        len(thinking_content.strip()) > 0
+    ), "There should be content between thinking tags"
+
+    # Assert there's content after the closing tag
+    assert (
+        len(content) > think_close_pos + 8
+    ), "There should be content after the thinking tags"
+    response_content = content[think_close_pos + 8 :].strip()
+    assert (
+        len(response_content) > 0
+    ), "There should be non-empty content after thinking tags"