diff --git a/docs/my-website/docs/completion/json_mode.md b/docs/my-website/docs/completion/json_mode.md
index a782bfb0a..51f76b7a6 100644
--- a/docs/my-website/docs/completion/json_mode.md
+++ b/docs/my-website/docs/completion/json_mode.md
@@ -75,6 +75,7 @@ Works for:
 - Google AI Studio - Gemini models
 - Vertex AI models (Gemini + Anthropic)
 - Bedrock Models
+- Anthropic API Models
 
 <Tabs>
 <TabItem value="sdk" label="SDK">
diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py
index 12194533c..2952d54d5 100644
--- a/litellm/llms/anthropic/chat/handler.py
+++ b/litellm/llms/anthropic/chat/handler.py
@@ -44,7 +44,9 @@ from litellm.types.llms.openai import (
     ChatCompletionToolCallFunctionChunk,
     ChatCompletionUsageBlock,
 )
-from litellm.types.utils import GenericStreamingChunk, PromptTokensDetailsWrapper
+from litellm.types.utils import GenericStreamingChunk
+from litellm.types.utils import Message as LitellmMessage
+from litellm.types.utils import PromptTokensDetailsWrapper
 from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
 
 from ...base import BaseLLM
@@ -94,6 +96,7 @@ async def make_call(
     messages: list,
     logging_obj,
     timeout: Optional[Union[float, httpx.Timeout]],
+    json_mode: bool,
 ) -> Tuple[Any, httpx.Headers]:
     if client is None:
         client = litellm.module_level_aclient
@@ -119,7 +122,9 @@ async def make_call(
         raise AnthropicError(status_code=500, message=str(e))
 
     completion_stream = ModelResponseIterator(
-        streaming_response=response.aiter_lines(), sync_stream=False
+        streaming_response=response.aiter_lines(),
+        sync_stream=False,
+        json_mode=json_mode,
     )
 
     # LOGGING
@@ -142,6 +147,7 @@ def make_sync_call(
     messages: list,
     logging_obj,
     timeout: Optional[Union[float, httpx.Timeout]],
+    json_mode: bool,
 ) -> Tuple[Any, httpx.Headers]:
     if client is None:
         client = litellm.module_level_client  # re-use a module level client
@@ -175,7 +181,7 @@ def make_sync_call(
         )
 
     completion_stream = ModelResponseIterator(
-        streaming_response=response.iter_lines(), sync_stream=True
+        streaming_response=response.iter_lines(), sync_stream=True, json_mode=json_mode
     )
 
     # LOGGING
@@ -270,11 +276,12 @@ class AnthropicChatCompletion(BaseLLM):
                     "arguments"
                 )
                 if json_mode_content_str is not None:
-                    args = json.loads(json_mode_content_str)
-                    values: Optional[dict] = args.get("values")
-                    if values is not None:
-                        _message = litellm.Message(content=json.dumps(values))
+                    _converted_message = self._convert_tool_response_to_message(
+                        tool_calls=tool_calls,
+                    )
+                    if _converted_message is not None:
                         completion_response["stop_reason"] = "stop"
+                        _message = _converted_message
             model_response.choices[0].message = _message  # type: ignore
             model_response._hidden_params["original_response"] = completion_response[
                 "content"
@@ -318,6 +325,37 @@ class AnthropicChatCompletion(BaseLLM):
         model_response._hidden_params = _hidden_params
         return model_response
 
+    @staticmethod
+    def _convert_tool_response_to_message(
+        tool_calls: List[ChatCompletionToolCallChunk],
+    ) -> Optional[LitellmMessage]:
+        """
+        In JSON mode, Anthropic API returns JSON schema as a tool call, we need to convert it to a message to follow the OpenAI format
+
+        """
+        ## HANDLE JSON MODE - anthropic returns single function call
+        json_mode_content_str: Optional[str] = tool_calls[0]["function"].get(
+            "arguments"
+        )
+        try:
+            if json_mode_content_str is not None:
+                args = json.loads(json_mode_content_str)
+                if (
+                    isinstance(args, dict)
+                    and (values := args.get("values")) is not None
+                ):
+                    _message = litellm.Message(content=json.dumps(values))
+                    return _message
+                else:
+                    # a lot of the times the `values` key is not present in the tool response
+                    # relevant issue: https://github.com/BerriAI/litellm/issues/6741
+                    _message = litellm.Message(content=json.dumps(args))
+                    return _message
+        except json.JSONDecodeError:
+            # json decode error does occur, return the original tool response str
+            return litellm.Message(content=json_mode_content_str)
+        return None
+
     async def acompletion_stream_function(
         self,
         model: str,
@@ -334,6 +372,7 @@ class AnthropicChatCompletion(BaseLLM):
         stream,
         _is_function_call,
         data: dict,
+        json_mode: bool,
         optional_params=None,
         litellm_params=None,
         logger_fn=None,
@@ -350,6 +389,7 @@ class AnthropicChatCompletion(BaseLLM):
             messages=messages,
             logging_obj=logging_obj,
             timeout=timeout,
+            json_mode=json_mode,
         )
         streamwrapper = CustomStreamWrapper(
             completion_stream=completion_stream,
@@ -501,6 +541,7 @@ class AnthropicChatCompletion(BaseLLM):
                     optional_params=optional_params,
                     stream=stream,
                     _is_function_call=_is_function_call,
+                    json_mode=json_mode,
                     litellm_params=litellm_params,
                     logger_fn=logger_fn,
                     headers=headers,
@@ -548,6 +589,7 @@ class AnthropicChatCompletion(BaseLLM):
                     messages=messages,
                     logging_obj=logging_obj,
                     timeout=timeout,
+                    json_mode=json_mode,
                 )
                 return CustomStreamWrapper(
                     completion_stream=completion_stream,
@@ -606,11 +648,14 @@ class AnthropicChatCompletion(BaseLLM):
 
 
 class ModelResponseIterator:
-    def __init__(self, streaming_response, sync_stream: bool):
+    def __init__(
+        self, streaming_response, sync_stream: bool, json_mode: Optional[bool] = False
+    ):
         self.streaming_response = streaming_response
         self.response_iterator = self.streaming_response
         self.content_blocks: List[ContentBlockDelta] = []
         self.tool_index = -1
+        self.json_mode = json_mode
 
     def check_empty_tool_call_args(self) -> bool:
         """
@@ -772,6 +817,8 @@ class ModelResponseIterator:
                     status_code=500,  # it looks like Anthropic API does not return a status code in the chunk error - default to 500
                 )
 
+            text, tool_use = self._handle_json_mode_chunk(text=text, tool_use=tool_use)
+
             returned_chunk = GenericStreamingChunk(
                 text=text,
                 tool_use=tool_use,
@@ -786,6 +833,34 @@ class ModelResponseIterator:
         except json.JSONDecodeError:
             raise ValueError(f"Failed to decode JSON from chunk: {chunk}")
 
+    def _handle_json_mode_chunk(
+        self, text: str, tool_use: Optional[ChatCompletionToolCallChunk]
+    ) -> Tuple[str, Optional[ChatCompletionToolCallChunk]]:
+        """
+        If JSON mode is enabled, convert the tool call to a message.
+
+        Anthropic returns the JSON schema as part of the tool call
+        OpenAI returns the JSON schema as part of the content, this handles placing it in the content
+
+        Args:
+            text: str
+            tool_use: Optional[ChatCompletionToolCallChunk]
+        Returns:
+            Tuple[str, Optional[ChatCompletionToolCallChunk]]
+
+            text: The text to use in the content
+            tool_use: The ChatCompletionToolCallChunk to use in the chunk response
+        """
+        if self.json_mode is True and tool_use is not None:
+            message = AnthropicChatCompletion._convert_tool_response_to_message(
+                tool_calls=[tool_use]
+            )
+            if message is not None:
+                text = message.content or ""
+                tool_use = None
+
+        return text, tool_use
+
     # Sync iterator
     def __iter__(self):
         return self
diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py
index 1e8132195..955eed957 100644
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@@ -48,6 +48,9 @@ class BaseLLMChatTest(ABC):
         )
         assert response is not None
 
+        # for OpenAI the content contains the JSON schema, so we need to assert that the content is not None
+        assert response.choices[0].message.content is not None
+
     def test_message_with_name(self):
         base_completion_call_args = self.get_base_completion_call_args()
         messages = [
@@ -82,6 +85,49 @@ class BaseLLMChatTest(ABC):
 
         print(response)
 
+        # OpenAI guarantees that the JSON schema is returned in the content
+        # relevant issue: https://github.com/BerriAI/litellm/issues/6741
+        assert response.choices[0].message.content is not None
+
+    def test_json_response_format_stream(self):
+        """
+        Test that the JSON response format with streaming is supported by the LLM API
+        """
+        base_completion_call_args = self.get_base_completion_call_args()
+        litellm.set_verbose = True
+
+        messages = [
+            {
+                "role": "system",
+                "content": "Your output should be a JSON object with no additional properties.  ",
+            },
+            {
+                "role": "user",
+                "content": "Respond with this in json. city=San Francisco, state=CA, weather=sunny, temp=60",
+            },
+        ]
+
+        response = litellm.completion(
+            **base_completion_call_args,
+            messages=messages,
+            response_format={"type": "json_object"},
+            stream=True,
+        )
+
+        print(response)
+
+        content = ""
+        for chunk in response:
+            content += chunk.choices[0].delta.content or ""
+
+        print("content=", content)
+
+        # OpenAI guarantees that the JSON schema is returned in the content
+        # relevant issue: https://github.com/BerriAI/litellm/issues/6741
+        # we need to assert that the JSON schema was returned in the content, (for Anthropic we were returning it as part of the tool call)
+        assert content is not None
+        assert len(content) > 0
+
     @pytest.fixture
     def pdf_messages(self):
         import base64
diff --git a/tests/llm_translation/test_anthropic_completion.py b/tests/llm_translation/test_anthropic_completion.py
index c399c3a47..8a788e0fb 100644
--- a/tests/llm_translation/test_anthropic_completion.py
+++ b/tests/llm_translation/test_anthropic_completion.py
@@ -33,8 +33,10 @@ from litellm import (
 )
 from litellm.adapters.anthropic_adapter import anthropic_adapter
 from litellm.types.llms.anthropic import AnthropicResponse
-
+from litellm.types.utils import GenericStreamingChunk, ChatCompletionToolCallChunk
+from litellm.types.llms.openai import ChatCompletionToolCallFunctionChunk
 from litellm.llms.anthropic.common_utils import process_anthropic_headers
+from litellm.llms.anthropic.chat.handler import AnthropicChatCompletion
 from httpx import Headers
 from base_llm_unit_tests import BaseLLMChatTest
 
@@ -694,3 +696,91 @@ class TestAnthropicCompletion(BaseLLMChatTest):
             assert _document_validation["type"] == "document"
             assert _document_validation["source"]["media_type"] == "application/pdf"
             assert _document_validation["source"]["type"] == "base64"
+
+
+def test_convert_tool_response_to_message_with_values():
+    """Test converting a tool response with 'values' key to a message"""
+    tool_calls = [
+        ChatCompletionToolCallChunk(
+            id="test_id",
+            type="function",
+            function=ChatCompletionToolCallFunctionChunk(
+                name="json_tool_call",
+                arguments='{"values": {"name": "John", "age": 30}}',
+            ),
+            index=0,
+        )
+    ]
+
+    message = AnthropicChatCompletion._convert_tool_response_to_message(
+        tool_calls=tool_calls
+    )
+
+    assert message is not None
+    assert message.content == '{"name": "John", "age": 30}'
+
+
+def test_convert_tool_response_to_message_without_values():
+    """
+    Test converting a tool response without 'values' key to a message
+
+    Anthropic API returns the JSON schema in the tool call, OpenAI Spec expects it in the message. This test ensures that the tool call is converted to a message correctly.
+
+    Relevant issue: https://github.com/BerriAI/litellm/issues/6741
+    """
+    tool_calls = [
+        ChatCompletionToolCallChunk(
+            id="test_id",
+            type="function",
+            function=ChatCompletionToolCallFunctionChunk(
+                name="json_tool_call", arguments='{"name": "John", "age": 30}'
+            ),
+            index=0,
+        )
+    ]
+
+    message = AnthropicChatCompletion._convert_tool_response_to_message(
+        tool_calls=tool_calls
+    )
+
+    assert message is not None
+    assert message.content == '{"name": "John", "age": 30}'
+
+
+def test_convert_tool_response_to_message_invalid_json():
+    """Test converting a tool response with invalid JSON"""
+    tool_calls = [
+        ChatCompletionToolCallChunk(
+            id="test_id",
+            type="function",
+            function=ChatCompletionToolCallFunctionChunk(
+                name="json_tool_call", arguments="invalid json"
+            ),
+            index=0,
+        )
+    ]
+
+    message = AnthropicChatCompletion._convert_tool_response_to_message(
+        tool_calls=tool_calls
+    )
+
+    assert message is not None
+    assert message.content == "invalid json"
+
+
+def test_convert_tool_response_to_message_no_arguments():
+    """Test converting a tool response with no arguments"""
+    tool_calls = [
+        ChatCompletionToolCallChunk(
+            id="test_id",
+            type="function",
+            function=ChatCompletionToolCallFunctionChunk(name="json_tool_call"),
+            index=0,
+        )
+    ]
+
+    message = AnthropicChatCompletion._convert_tool_response_to_message(
+        tool_calls=tool_calls
+    )
+
+    assert message is None