From e93f84b347df6d16d53353c175e54c376d2123ea Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 16:53:33 -0700
Subject: [PATCH 01/15] add type hints for AnthropicMessagesResponse

---
 .../anthropic_messages/anthropic_response.py  | 85 +++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 litellm/types/llms/anthropic_messages/anthropic_response.py

diff --git a/litellm/types/llms/anthropic_messages/anthropic_response.py b/litellm/types/llms/anthropic_messages/anthropic_response.py
new file mode 100644
index 0000000000..e260220776
--- /dev/null
+++ b/litellm/types/llms/anthropic_messages/anthropic_response.py
@@ -0,0 +1,85 @@
+from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
+
+import anthropic
+from pydantic import PropertyInfo
+from typing_extensions import Annotated, TypeAlias
+
+
+class AnthropicResponseTextBlock(TypedDict, total=False):
+    """
+    Anthropic Response Text Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    citations: Optional[List[Dict[str, Any]]]
+    text: str
+    type: Literal["text"]
+
+
+class AnthropicResponseToolUseBlock(TypedDict, total=False):
+    """
+    Anthropic Response Tool Use Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    id: Optional[str]
+    input: Optional[str]
+    name: Optional[str]
+    type: Literal["tool_use"]
+
+
+class AnthropicResponseThinkingBlock(TypedDict, total=False):
+    """
+    Anthropic Response Thinking Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    signature: Optional[str]
+    thinking: Optional[str]
+    type: Literal["thinking"]
+
+
+class AnthropicResponseRedactedThinkingBlock(TypedDict, total=False):
+    """
+    Anthropic Response Redacted Thinking Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    data: Optional[str]
+    type: Literal["redacted_thinking"]
+
+
+AnthropicResponseContentBlock: TypeAlias = Union[
+    AnthropicResponseTextBlock,
+    AnthropicResponseToolUseBlock,
+    AnthropicResponseThinkingBlock,
+    AnthropicResponseRedactedThinkingBlock,
+]
+
+
+class AnthropicUsage(TypedDict, total=False):
+    """
+    Input and output tokens used in the request
+    """
+
+    input_tokens: int
+    output_tokens: int
+
+    """
+    Cache Tokens Used
+    """
+    cache_creation_input_tokens: int
+    cache_read_input_tokens: int
+
+
+class AnthropicMessagesResponse(TypedDict, total=False):
+    """
+    Anthropic Messages API Response: https://docs.anthropic.com/en/api/messages
+    """
+
+    content: Optional[List[AnthropicResponseContentBlock]]
+    id: str
+    model: Optional[str]  # This represents the Model type from Anthropic
+    role: Optional[Literal["assistant"]]
+    stop_reason: Optional[
+        Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
+    ]
+    stop_sequence: Optional[str]
+    type: Optional[Literal["message"]]
+    usage: Optional[AnthropicUsage]

From 957b7eb82c803d79fe9c914fba70e4af3530fc73 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 16:54:45 -0700
Subject: [PATCH 02/15] define types for response form
 AnthropicMessagesResponse

---
 .../anthropic/experimental_pass_through/messages/handler.py  | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
index a7dfff74d9..9b890db266 100644
--- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
@@ -19,6 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     get_async_httpx_client,
 )
+from litellm.types.llms.anthropic_messages.anthropic_response import (
+    AnthropicMessagesResponse,
+)
 from litellm.types.router import GenericLiteLLMParams
 from litellm.types.utils import ProviderSpecificHeader
 from litellm.utils import ProviderConfigManager, client
@@ -68,7 +71,7 @@ async def anthropic_messages(
     client: Optional[AsyncHTTPHandler] = None,
     custom_llm_provider: Optional[str] = None,
     **kwargs,
-) -> Union[Dict[str, Any], AsyncIterator]:
+) -> Union[AnthropicMessagesResponse, AsyncIterator]:
     """
     Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
     """

From 968ef4a299dff3a7a476a4f24ba40a6fdb2df9fa Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 16:56:56 -0700
Subject: [PATCH 03/15] fix response typing

---
 litellm/types/llms/anthropic_messages/anthropic_response.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/litellm/types/llms/anthropic_messages/anthropic_response.py b/litellm/types/llms/anthropic_messages/anthropic_response.py
index e260220776..270807fc8f 100644
--- a/litellm/types/llms/anthropic_messages/anthropic_response.py
+++ b/litellm/types/llms/anthropic_messages/anthropic_response.py
@@ -1,8 +1,6 @@
 from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
 
-import anthropic
-from pydantic import PropertyInfo
-from typing_extensions import Annotated, TypeAlias
+from typing_extensions import TypeAlias
 
 
 class AnthropicResponseTextBlock(TypedDict, total=False):

From 07dce8bed324fd0d60c088d80b2362452b74fb34 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 17:05:38 -0700
Subject: [PATCH 04/15] allow using litellm.messages.acreate and
 litellm.messages.create

---
 litellm/__init__.py          |  1 +
 litellm/messages/__init__.py | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)
 create mode 100644 litellm/messages/__init__.py

diff --git a/litellm/__init__.py b/litellm/__init__.py
index a59484b035..0080e9551c 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -26,6 +26,7 @@ from litellm._logging import (
     log_level,
 )
 import re
+from .messages import *
 from litellm.constants import (
     DEFAULT_BATCH_SIZE,
     DEFAULT_FLUSH_INTERVAL_SECONDS,
diff --git a/litellm/messages/__init__.py b/litellm/messages/__init__.py
new file mode 100644
index 0000000000..1274f768ba
--- /dev/null
+++ b/litellm/messages/__init__.py
@@ -0,0 +1,16 @@
+"""
+Interface for Anthropic's messages API
+
+Use this to call LLMs in Anthropic /messages Request/Response format
+"""
+
+from litellm.llms.anthropic.experimental_pass_through.handler import (
+    anthropic_messages as _async_anthropic_messages,
+)
+
+
+async def acreate(*args, **kwargs):
+    """
+    Wrapper around Anthropic's messages API
+    """
+    return await _async_anthropic_messages(*args, **kwargs)

From 3640262dbfacccefd3f8b70ac2677a18b4e3a345 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 17:12:40 -0700
Subject: [PATCH 05/15] fix anthropic_messages implementation

---
 .../messages/handler.py                       | 21 +++-
 litellm/messages/__init__.py                  | 98 ++++++++++++++++++-
 2 files changed, 111 insertions(+), 8 deletions(-)

diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
index 9b890db266..54826a38ba 100644
--- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
@@ -6,7 +6,7 @@
 """
 
 import json
-from typing import Any, AsyncIterator, Dict, Optional, Union, cast
+from typing import AsyncIterator, Dict, List, Optional, Union, cast
 
 import httpx
 
@@ -64,9 +64,20 @@ class AnthropicMessagesHandler:
 
 @client
 async def anthropic_messages(
-    api_key: str,
+    max_tokens: int,
+    messages: List[Dict[str, Union[str, List[Dict[str, str]]]]],
     model: str,
-    stream: bool = False,
+    metadata: Optional[Dict] = None,
+    stop_sequences: Optional[List[str]] = None,
+    stream: Optional[bool] = False,
+    system: Optional[str] = None,
+    temperature: Optional[float] = 1.0,
+    thinking: Optional[Dict] = None,
+    tool_choice: Optional[Dict] = None,
+    tools: Optional[List[Dict]] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    api_key: Optional[str] = None,
     api_base: Optional[str] = None,
     client: Optional[AsyncHTTPHandler] = None,
     custom_llm_provider: Optional[str] = None,
@@ -133,7 +144,7 @@ async def anthropic_messages(
     litellm_logging_obj.model_call_details.update(kwargs)
 
     # Prepare request body
-    request_body = kwargs.copy()
+    request_body = locals().copy()
     request_body = {
         k: v
         for k, v in request_body.items()
@@ -165,7 +176,7 @@ async def anthropic_messages(
         url=request_url,
         headers=headers,
         data=json.dumps(request_body),
-        stream=stream,
+        stream=stream or False,
     )
     response.raise_for_status()
 
diff --git a/litellm/messages/__init__.py b/litellm/messages/__init__.py
index 1274f768ba..0a4ea92c2c 100644
--- a/litellm/messages/__init__.py
+++ b/litellm/messages/__init__.py
@@ -4,13 +4,105 @@ Interface for Anthropic's messages API
 Use this to call LLMs in Anthropic /messages Request/Response format
 """
 
+from typing import Dict, List, Optional, Union
+
 from litellm.llms.anthropic.experimental_pass_through.handler import (
     anthropic_messages as _async_anthropic_messages,
 )
 
 
-async def acreate(*args, **kwargs):
+async def acreate(
+    max_tokens: int,
+    messages: List[Dict[str, Union[str, List[Dict[str, str]]]]],
+    model: str,
+    metadata: Optional[Dict] = None,
+    stop_sequences: Optional[List[str]] = None,
+    stream: Optional[bool] = False,
+    system: Optional[str] = None,
+    temperature: Optional[float] = 1.0,
+    thinking: Optional[Dict] = None,
+    tool_choice: Optional[Dict] = None,
+    tools: Optional[List[Dict]] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    **kwargs
+) -> Dict:
     """
-    Wrapper around Anthropic's messages API
+    Async wrapper for Anthropic's messages API
+
+    Args:
+        max_tokens (int): Maximum tokens to generate (required)
+        messages (List[Dict]): List of message objects with role and content (required)
+        model (str): Model name to use (required)
+        metadata (Dict, optional): Request metadata
+        stop_sequences (List[str], optional): Custom stop sequences
+        stream (bool, optional): Whether to stream the response
+        system (str, optional): System prompt
+        temperature (float, optional): Sampling temperature (0.0 to 1.0)
+        thinking (Dict, optional): Extended thinking configuration
+        tool_choice (Dict, optional): Tool choice configuration
+        tools (List[Dict], optional): List of tool definitions
+        top_k (int, optional): Top K sampling parameter
+        top_p (float, optional): Nucleus sampling parameter
+        **kwargs: Additional arguments
+
+    Returns:
+        Dict: Response from the API
     """
-    return await _async_anthropic_messages(*args, **kwargs)
+    return await _async_anthropic_messages(
+        max_tokens=max_tokens,
+        messages=messages,
+        model=model,
+        metadata=metadata,
+        stop_sequences=stop_sequences,
+        stream=stream,
+        system=system,
+        temperature=temperature,
+        thinking=thinking,
+        tool_choice=tool_choice,
+        tools=tools,
+        top_k=top_k,
+        top_p=top_p,
+        **kwargs,
+    )
+
+
+async def create(
+    max_tokens: int,
+    messages: List[Dict[str, Union[str, List[Dict[str, str]]]]],
+    model: str,
+    metadata: Optional[Dict] = None,
+    stop_sequences: Optional[List[str]] = None,
+    stream: Optional[bool] = False,
+    system: Optional[str] = None,
+    temperature: Optional[float] = 1.0,
+    thinking: Optional[Dict] = None,
+    tool_choice: Optional[Dict] = None,
+    tools: Optional[List[Dict]] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    **kwargs
+) -> Dict:
+    """
+    Async wrapper for Anthropic's messages API
+
+    Args:
+        max_tokens (int): Maximum tokens to generate (required)
+        messages (List[Dict]): List of message objects with role and content (required)
+        model (str): Model name to use (required)
+        metadata (Dict, optional): Request metadata
+        stop_sequences (List[str], optional): Custom stop sequences
+        stream (bool, optional): Whether to stream the response
+        system (str, optional): System prompt
+        temperature (float, optional): Sampling temperature (0.0 to 1.0)
+        thinking (Dict, optional): Extended thinking configuration
+        tool_choice (Dict, optional): Tool choice configuration
+        tools (List[Dict], optional): List of tool definitions
+        top_k (int, optional): Top K sampling parameter
+        top_p (float, optional): Nucleus sampling parameter
+        **kwargs: Additional arguments
+
+    Returns:
+        Dict: Response from the API
+    """
+    raise NotImplementedError("This function is not implemented")

From 1b085a306a6853a3e5aebc78800771dce2e3b2a3 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 17:17:56 -0700
Subject: [PATCH 06/15] add clear type hints to litellm.messages.create
 functions

---
 litellm/messages/__init__.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/litellm/messages/__init__.py b/litellm/messages/__init__.py
index 0a4ea92c2c..b2be67bbd9 100644
--- a/litellm/messages/__init__.py
+++ b/litellm/messages/__init__.py
@@ -9,6 +9,9 @@ from typing import Dict, List, Optional, Union
 from litellm.llms.anthropic.experimental_pass_through.handler import (
     anthropic_messages as _async_anthropic_messages,
 )
+from litellm.types.llms.anthropic_messages.anthropic_response import (
+    AnthropicMessagesResponse,
+)
 
 
 async def acreate(
@@ -26,7 +29,7 @@ async def acreate(
     top_k: Optional[int] = None,
     top_p: Optional[float] = None,
     **kwargs
-) -> Dict:
+) -> AnthropicMessagesResponse:
     """
     Async wrapper for Anthropic's messages API
 
@@ -82,7 +85,7 @@ async def create(
     top_k: Optional[int] = None,
     top_p: Optional[float] = None,
     **kwargs
-) -> Dict:
+) -> AnthropicMessagesResponse:
     """
     Async wrapper for Anthropic's messages API
 

From 8dcdff92803c916dd6e9fb3638cfad24cd2bde64 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 17:21:14 -0700
Subject: [PATCH 07/15] fix anthropic_messages

---
 .../anthropic/experimental_pass_through/messages/handler.py   | 2 +-
 litellm/messages/__init__.py                                  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
index 54826a38ba..0b809a4460 100644
--- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
@@ -65,7 +65,7 @@ class AnthropicMessagesHandler:
 @client
 async def anthropic_messages(
     max_tokens: int,
-    messages: List[Dict[str, Union[str, List[Dict[str, str]]]]],
+    messages: List[Dict],
     model: str,
     metadata: Optional[Dict] = None,
     stop_sequences: Optional[List[str]] = None,
diff --git a/litellm/messages/__init__.py b/litellm/messages/__init__.py
index b2be67bbd9..c5cb3f5330 100644
--- a/litellm/messages/__init__.py
+++ b/litellm/messages/__init__.py
@@ -16,7 +16,7 @@ from litellm.types.llms.anthropic_messages.anthropic_response import (
 
 async def acreate(
     max_tokens: int,
-    messages: List[Dict[str, Union[str, List[Dict[str, str]]]]],
+    messages: List[Dict],
     model: str,
     metadata: Optional[Dict] = None,
     stop_sequences: Optional[List[str]] = None,
@@ -72,7 +72,7 @@ async def acreate(
 
 async def create(
     max_tokens: int,
-    messages: List[Dict[str, Union[str, List[Dict[str, str]]]]],
+    messages: List[Dict],
     model: str,
     metadata: Optional[Dict] = None,
     stop_sequences: Optional[List[str]] = None,

From 9eb9a369bbfc301f4998e555fa3082c29aea4dd1 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 17:34:41 -0700
Subject: [PATCH 08/15] working anthropic API tests

---
 litellm/__init__.py                                |  2 +-
 .../experimental_pass_through/messages/handler.py  |  3 ++-
 litellm/messages/__init__.py                       | 14 ++++++++++----
 .../test_anthropic_messages_passthrough.py         | 12 +++++++-----
 4 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/litellm/__init__.py b/litellm/__init__.py
index 0080e9551c..4e1410a319 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -26,7 +26,6 @@ from litellm._logging import (
     log_level,
 )
 import re
-from .messages import *
 from litellm.constants import (
     DEFAULT_BATCH_SIZE,
     DEFAULT_FLUSH_INTERVAL_SECONDS,
@@ -1028,6 +1027,7 @@ from .proxy.proxy_cli import run_server
 from .router import Router
 from .assistants.main import *
 from .batches.main import *
+from .messages import *
 from .batch_completion.main import *  # type: ignore
 from .rerank_api.main import *
 from .llms.anthropic.experimental_pass_through.messages.handler import *
diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
index 0b809a4460..fd7a3d60a8 100644
--- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
@@ -71,7 +71,7 @@ async def anthropic_messages(
     stop_sequences: Optional[List[str]] = None,
     stream: Optional[bool] = False,
     system: Optional[str] = None,
-    temperature: Optional[float] = 1.0,
+    temperature: Optional[float] = None,
     thinking: Optional[Dict] = None,
     tool_choice: Optional[Dict] = None,
     tools: Optional[List[Dict]] = None,
@@ -152,6 +152,7 @@ async def anthropic_messages(
         in anthropic_messages_provider_config.get_supported_anthropic_messages_params(
             model=model
         )
+        and v is not None
     }
     request_body["stream"] = stream
     request_body["model"] = model
diff --git a/litellm/messages/__init__.py b/litellm/messages/__init__.py
index c5cb3f5330..f3249f981b 100644
--- a/litellm/messages/__init__.py
+++ b/litellm/messages/__init__.py
@@ -2,11 +2,17 @@
 Interface for Anthropic's messages API
 
 Use this to call LLMs in Anthropic /messages Request/Response format
+
+This is an __init__.py file to allow the following interface
+
+- litellm.messages.acreate
+- litellm.messages.create
+
 """
 
-from typing import Dict, List, Optional, Union
+from typing import AsyncIterator, Dict, Iterator, List, Optional, Union
 
-from litellm.llms.anthropic.experimental_pass_through.handler import (
+from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
     anthropic_messages as _async_anthropic_messages,
 )
 from litellm.types.llms.anthropic_messages.anthropic_response import (
@@ -29,7 +35,7 @@ async def acreate(
     top_k: Optional[int] = None,
     top_p: Optional[float] = None,
     **kwargs
-) -> AnthropicMessagesResponse:
+) -> Union[AnthropicMessagesResponse, AsyncIterator]:
     """
     Async wrapper for Anthropic's messages API
 
@@ -85,7 +91,7 @@ async def create(
     top_k: Optional[int] = None,
     top_p: Optional[float] = None,
     **kwargs
-) -> AnthropicMessagesResponse:
+) -> Union[AnthropicMessagesResponse, Iterator]:
     """
     Async wrapper for Anthropic's messages API
 
diff --git a/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py b/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py
index b5b3302acc..cb7ac0d3bf 100644
--- a/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py
+++ b/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py
@@ -8,7 +8,7 @@ import unittest.mock
 from unittest.mock import AsyncMock, MagicMock
 
 sys.path.insert(
-    0, os.path.abspath("../..")
+    0, os.path.abspath("../../..")
 )  # Adds the parent directory to the system path
 import litellm
 import pytest
@@ -16,6 +16,7 @@ from dotenv import load_dotenv
 from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
     anthropic_messages,
 )
+
 from typing import Optional
 from litellm.types.utils import StandardLoggingPayload
 from litellm.integrations.custom_logger import CustomLogger
@@ -73,6 +74,7 @@ async def test_anthropic_messages_non_streaming():
     """
     Test the anthropic_messages with non-streaming request
     """
+    litellm._turn_on_debug()
     # Get API key from environment
     api_key = os.getenv("ANTHROPIC_API_KEY")
     if not api_key:
@@ -82,7 +84,7 @@ async def test_anthropic_messages_non_streaming():
     messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
 
     # Call the handler
-    response = await anthropic_messages(
+    response = await litellm.messages.acreate(
         messages=messages,
         api_key=api_key,
         model="claude-3-haiku-20240307",
@@ -114,7 +116,7 @@ async def test_anthropic_messages_streaming():
 
     # Call the handler
     async_httpx_client = AsyncHTTPHandler()
-    response = await anthropic_messages(
+    response = await litellm.messages.acreate(
         messages=messages,
         api_key=api_key,
         model="claude-3-haiku-20240307",
@@ -134,7 +136,7 @@ async def test_anthropic_messages_streaming_with_bad_request():
     Test the anthropic_messages with streaming request
     """
     try:
-        response = await anthropic_messages(
+        response = await litellm.messages.acreate(
             messages=["hi"],
             api_key=os.getenv("ANTHROPIC_API_KEY"),
             model="claude-3-haiku-20240307",
@@ -458,7 +460,7 @@ async def test_anthropic_messages_with_extra_headers():
     mock_client.post = AsyncMock(return_value=mock_response)
 
     # Call the handler with extra_headers and our mocked client
-    response = await anthropic_messages(
+    response = await litellm.messages.acreate(
         messages=messages,
         api_key=api_key,
         model="claude-3-haiku-20240307",

From 8499a88e4a45dcd887c027b143dbea892a2480dd Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Mar 2025 17:45:47 -0700
Subject: [PATCH 09/15] fixes - anthropic messages interface

---
 .../anthropic/experimental_pass_through/messages/handler.py    | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
index fd7a3d60a8..8b4c947a7c 100644
--- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
@@ -141,8 +141,6 @@ async def anthropic_messages(
         },
         custom_llm_provider=_custom_llm_provider,
     )
-    litellm_logging_obj.model_call_details.update(kwargs)
-
     # Prepare request body
     request_body = locals().copy()
     request_body = {
@@ -157,6 +155,7 @@ async def anthropic_messages(
     request_body["stream"] = stream
     request_body["model"] = model
     litellm_logging_obj.stream = stream
+    litellm_logging_obj.model_call_details.update(request_body)
 
     # Make the request
     request_url = anthropic_messages_provider_config.get_complete_url(

From bd39a395f1eb33f0398bb74aa2e3f263908c65e2 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 31 Mar 2025 14:31:09 -0700
Subject: [PATCH 10/15] use new anthropic interface

---
 litellm/__init__.py                                       | 2 +-
 litellm/anthropic/__init__.py                             | 4 ++++
 litellm/{ => anthropic}/messages/__init__.py              | 0
 .../test_anthropic_messages_passthrough.py                | 8 ++++----
 4 files changed, 9 insertions(+), 5 deletions(-)
 create mode 100644 litellm/anthropic/__init__.py
 rename litellm/{ => anthropic}/messages/__init__.py (100%)

diff --git a/litellm/__init__.py b/litellm/__init__.py
index a62322cef9..4db5bff11b 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -1026,7 +1026,7 @@ from .proxy.proxy_cli import run_server
 from .router import Router
 from .assistants.main import *
 from .batches.main import *
-from .messages import *
+from .anthropic import *
 from .batch_completion.main import *  # type: ignore
 from .rerank_api.main import *
 from .llms.anthropic.experimental_pass_through.messages.handler import *
diff --git a/litellm/anthropic/__init__.py b/litellm/anthropic/__init__.py
new file mode 100644
index 0000000000..c56a530c3c
--- /dev/null
+++ b/litellm/anthropic/__init__.py
@@ -0,0 +1,4 @@
+"""
+Anthropic module for LiteLLM
+"""
+from .messages import acreate, create
diff --git a/litellm/messages/__init__.py b/litellm/anthropic/messages/__init__.py
similarity index 100%
rename from litellm/messages/__init__.py
rename to litellm/anthropic/messages/__init__.py
diff --git a/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py b/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py
index cb7ac0d3bf..ec268b1a24 100644
--- a/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py
+++ b/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py
@@ -84,7 +84,7 @@ async def test_anthropic_messages_non_streaming():
     messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
 
     # Call the handler
-    response = await litellm.messages.acreate(
+    response = await litellm.anthropic.messages.acreate(
         messages=messages,
         api_key=api_key,
         model="claude-3-haiku-20240307",
@@ -116,7 +116,7 @@ async def test_anthropic_messages_streaming():
 
     # Call the handler
     async_httpx_client = AsyncHTTPHandler()
-    response = await litellm.messages.acreate(
+    response = await litellm.anthropic.messages.acreate(
         messages=messages,
         api_key=api_key,
         model="claude-3-haiku-20240307",
@@ -136,7 +136,7 @@ async def test_anthropic_messages_streaming_with_bad_request():
     Test the anthropic_messages with streaming request
     """
     try:
-        response = await litellm.messages.acreate(
+        response = await litellm.anthropic.messages.acreate(
             messages=["hi"],
             api_key=os.getenv("ANTHROPIC_API_KEY"),
             model="claude-3-haiku-20240307",
@@ -460,7 +460,7 @@ async def test_anthropic_messages_with_extra_headers():
     mock_client.post = AsyncMock(return_value=mock_response)
 
     # Call the handler with extra_headers and our mocked client
-    response = await litellm.messages.acreate(
+    response = await litellm.anthropic.messages.acreate(
         messages=messages,
         api_key=api_key,
         model="claude-3-haiku-20240307",

From cfab8eec5b0dfdd464040467f177b4947823352e Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 31 Mar 2025 14:44:12 -0700
Subject: [PATCH 11/15] fix code quality check

---
 litellm/anthropic/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/litellm/anthropic/__init__.py b/litellm/anthropic/__init__.py
index c56a530c3c..9902fdc553 100644
--- a/litellm/anthropic/__init__.py
+++ b/litellm/anthropic/__init__.py
@@ -2,3 +2,5 @@
 Anthropic module for LiteLLM
 """
 from .messages import acreate, create
+
+__all__ = ["acreate", "create"]

From b8c0526b98bd21d4d810acdc322c351e7f12ea75 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 31 Mar 2025 15:28:40 -0700
Subject: [PATCH 12/15] docs anthropic messages endpoint

---
 docs/my-website/docs/anthropic_unified.md | 271 +++++++++++++++++++---
 1 file changed, 240 insertions(+), 31 deletions(-)

diff --git a/docs/my-website/docs/anthropic_unified.md b/docs/my-website/docs/anthropic_unified.md
index cf6ba798d5..485571aa28 100644
--- a/docs/my-website/docs/anthropic_unified.md
+++ b/docs/my-website/docs/anthropic_unified.md
@@ -3,9 +3,10 @@ import TabItem from '@theme/TabItem';
 
 # /v1/messages [BETA] 
 
-LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint. 
+Use LiteLLM to call all your LLM APIs in the Anthropic `v1/messages` format. 
 
-This currently just supports the Anthropic API. 
+
+## Overview 
 
 | Feature | Supported | Notes | 
 |-------|-------|-------|
@@ -21,9 +22,61 @@ Planned improvement:
 - Bedrock Anthropic support
 
 ## Usage 
+---
+
+### LiteLLM Python SDK 
+
+#### Non-streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    api_key=api_key,
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+)
+```
+
+Example response:
+```json
+{
+  "content": [
+    {
+      "text": "Hi! this is a very short joke",
+      "type": "text"
+    }
+  ],
+  "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+  "model": "claude-3-7-sonnet-20250219",
+  "role": "assistant",
+  "stop_reason": "end_turn",
+  "stop_sequence": null,
+  "type": "message",
+  "usage": {
+    "input_tokens": 2095,
+    "output_tokens": 503,
+    "cache_creation_input_tokens": 2095,
+    "cache_read_input_tokens": 0
+  }
+}
+```
+
+#### Streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    api_key=api_key,
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+    stream=True,
+)
+async for chunk in response:
+    print(chunk)
+```
+
+### LiteLLM Proxy Server 
 
-<Tabs>
-<TabItem label="PROXY" value="proxy">
 
 1. Setup config.yaml
 
@@ -42,7 +95,28 @@ litellm --config /path/to/config.yaml
 
 3. Test it! 
 
-```bash
+<Tabs>
+<TabItem label="Anthropic Python SDK" value="python">
+
+```python showLineNumbers title="Example using LiteLLM Proxy Server"
+import anthropic
+
+# point anthropic sdk to litellm proxy 
+client = anthropic.Anthropic(
+    base_url="http://0.0.0.0:4000",
+    api_key="sk-1234",
+)
+
+response = client.messages.create(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+)
+```
+</TabItem>
+<TabItem label="curl" value="curl">
+
+```bash showLineNumbers title="Example using LiteLLM Proxy Server"
 curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
 -H 'content-type: application/json' \
 -H 'x-api-key: $LITELLM_API_KEY' \
@@ -52,41 +126,176 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
   "messages": [
     {
       "role": "user",
-      "content": [
-        {
-          "type": "text",
-          "text": "List 5 important events in the XIX century"
-        }
-      ]
+      "content": "Hello, can you tell me a short joke?"
     }
   ],
-  "max_tokens": 4096
+  "max_tokens": 100
 }'
 ```
+
 </TabItem>
-<TabItem value="sdk" label="SDK">
+</Tabs>
 
-```python
-from litellm.llms.anthropic.experimental_pass_through.messages.handler import anthropic_messages
-import asyncio 
-import os 
 
-# set env 
-os.environ["ANTHROPIC_API_KEY"] = "my-api-key"
+## Request Format
+---
 
-messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
+Request body will be in the Anthropic messages API format. **litellm follows the Anthropic messages specification for this endpoint.**
 
-# Call the handler
-async def call(): 
-    response = await anthropic_messages(
-        messages=messages,
-        api_key=api_key,
-        model="claude-3-haiku-20240307",
-        max_tokens=100,
-    )
+#### Example request body
 
-asyncio.run(call())
+```json
+{
+  "model": "claude-3-7-sonnet-20250219",
+  "max_tokens": 1024,
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hello, world"
+    }
+  ]
+}
 ```
 
-</TabItem>
-</Tabs>
\ No newline at end of file
+#### Required Fields
+- **model** (string):  
+  The model identifier (e.g., `"claude-3-7-sonnet-20250219"`).
+- **max_tokens** (integer):  
+  The maximum number of tokens to generate before stopping.  
+  _Note: The model may stop before reaching this limit; value must be greater than 1._
+- **messages** (array of objects):  
+  An ordered list of conversational turns.  
+  Each message object must include:
+  - **role** (enum: `"user"` or `"assistant"`):  
+    Specifies the speaker of the message.
+  - **content** (string or array of content blocks):  
+    The text or content blocks (e.g., an array containing objects with a `type` such as `"text"`) that form the message.  
+    _Example equivalence:_
+    ```json
+    {"role": "user", "content": "Hello, Claude"}
+    ```
+    is equivalent to:
+    ```json
+    {"role": "user", "content": [{"type": "text", "text": "Hello, Claude"}]}
+    ```
+
+#### Optional Fields
+- **metadata** (object):  
+  Contains additional metadata about the request (e.g., `user_id` as an opaque identifier).
+- **stop_sequences** (array of strings):  
+  Custom sequences that, when encountered in the generated text, cause the model to stop.
+- **stream** (boolean):  
+  Indicates whether to stream the response using server-sent events.
+- **system** (string or array):  
+  A system prompt providing context or specific instructions to the model.
+- **temperature** (number):  
+  Controls randomness in the model’s responses. Valid range: `0 < temperature < 1`.
+- **thinking** (object):  
+  Configuration for enabling extended thinking. If enabled, it includes:
+  - **budget_tokens** (integer):  
+    Minimum of 1024 tokens (and less than `max_tokens`).
+  - **type** (enum):  
+    E.g., `"enabled"`.
+- **tool_choice** (object):  
+  Instructs how the model should utilize any provided tools.
+- **tools** (array of objects):  
+  Definitions for tools available to the model. Each tool includes:
+  - **name** (string):  
+    The tool’s name.
+  - **description** (string):  
+    A detailed description of the tool.
+  - **input_schema** (object):  
+    A JSON schema describing the expected input format for the tool.
+- **top_k** (integer):  
+  Limits sampling to the top K options.
+- **top_p** (number):  
+  Enables nucleus sampling with a cumulative probability cutoff. Valid range: `0 < top_p < 1`.
+
+
+## Response Format
+---
+
+Responses will be in the Anthropic messages API format.
+
+#### Example Response
+
+```json
+{
+  "content": [
+    {
+      "text": "Hi! My name is Claude.",
+      "type": "text"
+    }
+  ],
+  "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+  "model": "claude-3-7-sonnet-20250219",
+  "role": "assistant",
+  "stop_reason": "end_turn",
+  "stop_sequence": null,
+  "type": "message",
+  "usage": {
+    "input_tokens": 2095,
+    "output_tokens": 503,
+    "cache_creation_input_tokens": 2095,
+    "cache_read_input_tokens": 0
+  }
+}
+```
+
+#### Response fields
+
+- **content** (array of objects):  
+  Contains the generated content blocks from the model. Each block includes:
+  - **type** (string):  
+    Indicates the type of content (e.g., `"text"`, `"tool_use"`, `"thinking"`, or `"redacted_thinking"`).
+  - **text** (string):  
+    The generated text from the model.  
+    _Note: Maximum length is 5,000,000 characters._
+  - **citations** (array of objects or `null`):  
+    Optional field providing citation details. Each citation includes:
+    - **cited_text** (string):  
+      The excerpt being cited.
+    - **document_index** (integer):  
+      An index referencing the cited document.
+    - **document_title** (string or `null`):  
+      The title of the cited document.
+    - **start_char_index** (integer):  
+      The starting character index for the citation.
+    - **end_char_index** (integer):  
+      The ending character index for the citation.
+    - **type** (string):  
+      Typically `"char_location"`.
+
+- **id** (string):  
+  A unique identifier for the response message.  
+  _Note: The format and length of IDs may change over time._
+
+- **model** (string):  
+  Specifies the model that generated the response.
+
+- **role** (string):  
+  Indicates the role of the generated message. For responses, this is always `"assistant"`.
+
+- **stop_reason** (string):  
+  Explains why the model stopped generating text. Possible values include:
+  - `"end_turn"`: The model reached a natural stopping point.
+  - `"max_tokens"`: The generation stopped because the maximum token limit was reached.
+  - `"stop_sequence"`: A custom stop sequence was encountered.
+  - `"tool_use"`: The model invoked one or more tools.
+
+- **stop_sequence** (string or `null`):  
+  Contains the specific stop sequence that caused the generation to halt, if applicable; otherwise, it is `null`.
+
+- **type** (string):  
+  Denotes the type of response object, which is always `"message"`.
+
+- **usage** (object):  
+  Provides details on token usage for billing and rate limiting. This includes:
+  - **input_tokens** (integer):  
+    Total number of input tokens processed.
+  - **output_tokens** (integer):  
+    Total number of output tokens generated.
+  - **cache_creation_input_tokens** (integer or `null`):  
+    Number of tokens used to create a cache entry.
+  - **cache_read_input_tokens** (integer or `null`):  
+    Number of tokens read from the cache.

From 43db2011cc460c76c5ade138caf24cf5b1935f8b Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 31 Mar 2025 15:36:15 -0700
Subject: [PATCH 13/15] add namespace_packages = True to mypy

---
 mypy.ini | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mypy.ini b/mypy.ini
index 19ead3ba7d..3ce8c5fcc0 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -2,6 +2,7 @@
 warn_return_any = False
 ignore_missing_imports = True
 mypy_path = litellm/stubs
+namespace_packages = True
 
 [mypy-google.*]
 ignore_missing_imports = True

From ceab3cfa7ce6e9856fbf0b02dbfa4aefb2b90530 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 31 Mar 2025 16:52:39 -0700
Subject: [PATCH 14/15] fix mypy lint errors

---
 litellm/__init__.py                                      | 2 +-
 litellm/{anthropic => anthropic_interface}/__init__.py   | 0
 .../messages/__init__.py                                 | 0
 litellm/anthropic_interface/readme.md                    | 9 +++++++++
 4 files changed, 10 insertions(+), 1 deletion(-)
 rename litellm/{anthropic => anthropic_interface}/__init__.py (100%)
 rename litellm/{anthropic => anthropic_interface}/messages/__init__.py (100%)
 create mode 100644 litellm/anthropic_interface/readme.md

diff --git a/litellm/__init__.py b/litellm/__init__.py
index 4db5bff11b..9997b9a8ac 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -1026,7 +1026,6 @@ from .proxy.proxy_cli import run_server
 from .router import Router
 from .assistants.main import *
 from .batches.main import *
-from .anthropic import *
 from .batch_completion.main import *  # type: ignore
 from .rerank_api.main import *
 from .llms.anthropic.experimental_pass_through.messages.handler import *
@@ -1039,6 +1038,7 @@ from .cost_calculator import response_cost_calculator, cost_per_token
 
 ### ADAPTERS ###
 from .types.adapter import AdapterItem
+import litellm.anthropic_interface as anthropic
 
 adapters: List[AdapterItem] = []
 
diff --git a/litellm/anthropic/__init__.py b/litellm/anthropic_interface/__init__.py
similarity index 100%
rename from litellm/anthropic/__init__.py
rename to litellm/anthropic_interface/__init__.py
diff --git a/litellm/anthropic/messages/__init__.py b/litellm/anthropic_interface/messages/__init__.py
similarity index 100%
rename from litellm/anthropic/messages/__init__.py
rename to litellm/anthropic_interface/messages/__init__.py
diff --git a/litellm/anthropic_interface/readme.md b/litellm/anthropic_interface/readme.md
new file mode 100644
index 0000000000..69099dd634
--- /dev/null
+++ b/litellm/anthropic_interface/readme.md
@@ -0,0 +1,9 @@
+## Use LLM API endpoints in Anthropic Interface
+
+Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking.
+
+### Usage
+
+```python
+import litellm.anthropic_interface as anthropic
+```
\ No newline at end of file

From d91bf3cc91c388de4a72dfd448b937c3b44c613d Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 31 Mar 2025 16:53:39 -0700
Subject: [PATCH 15/15] docs anthropic messages interface

---
 litellm/anthropic_interface/readme.md | 113 +++++++++++++++++++++++++-
 1 file changed, 110 insertions(+), 3 deletions(-)

diff --git a/litellm/anthropic_interface/readme.md b/litellm/anthropic_interface/readme.md
index 69099dd634..01c5f1b7c3 100644
--- a/litellm/anthropic_interface/readme.md
+++ b/litellm/anthropic_interface/readme.md
@@ -2,8 +2,115 @@
 
 Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking.
 
-### Usage
 
-```python
-import litellm.anthropic_interface as anthropic
+## Usage 
+---
+
+### LiteLLM Python SDK 
+
+#### Non-streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    api_key=api_key,
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+)
+```
+
+Example response:
+```json
+{
+  "content": [
+    {
+      "text": "Hi! this is a very short joke",
+      "type": "text"
+    }
+  ],
+  "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+  "model": "claude-3-7-sonnet-20250219",
+  "role": "assistant",
+  "stop_reason": "end_turn",
+  "stop_sequence": null,
+  "type": "message",
+  "usage": {
+    "input_tokens": 2095,
+    "output_tokens": 503,
+    "cache_creation_input_tokens": 2095,
+    "cache_read_input_tokens": 0
+  }
+}
+```
+
+#### Streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    api_key=api_key,
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+    stream=True,
+)
+async for chunk in response:
+    print(chunk)
+```
+
+### LiteLLM Proxy Server 
+
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+    - model_name: anthropic-claude
+      litellm_params:
+        model: claude-3-7-sonnet-latest
+```
+
+2. Start proxy 
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+3. Test it! 
+
+<Tabs>
+<TabItem label="Anthropic Python SDK" value="python">
+
+```python showLineNumbers title="Example using LiteLLM Proxy Server"
+import anthropic
+
+# point anthropic sdk to litellm proxy 
+client = anthropic.Anthropic(
+    base_url="http://0.0.0.0:4000",
+    api_key="sk-1234",
+)
+
+response = client.messages.create(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+)
+```
+</TabItem>
+<TabItem label="curl" value="curl">
+
+```bash showLineNumbers title="Example using LiteLLM Proxy Server"
+curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
+-H 'content-type: application/json' \
+-H 'x-api-key: $LITELLM_API_KEY' \
+-H 'anthropic-version: 2023-06-01' \
+-d '{
+  "model": "anthropic-claude",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hello, can you tell me a short joke?"
+    }
+  ],
+  "max_tokens": 100
+}'
 ```
\ No newline at end of file