diff --git a/docs/my-website/docs/anthropic_unified.md b/docs/my-website/docs/anthropic_unified.md
index cf6ba798d5..485571aa28 100644
--- a/docs/my-website/docs/anthropic_unified.md
+++ b/docs/my-website/docs/anthropic_unified.md
@@ -3,9 +3,10 @@ import TabItem from '@theme/TabItem';
 
 # /v1/messages [BETA] 
 
-LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint. 
+Use LiteLLM to call all your LLM APIs in the Anthropic `v1/messages` format. 
 
-This currently just supports the Anthropic API. 
+
+## Overview 
 
 | Feature | Supported | Notes | 
 |-------|-------|-------|
@@ -21,9 +22,61 @@ Planned improvement:
 - Bedrock Anthropic support
 
 ## Usage 
+---
+
+### LiteLLM Python SDK 
+
+#### Non-streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    api_key=api_key,
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+)
+```
+
+Example response:
+```json
+{
+  "content": [
+    {
+      "text": "Hi! this is a very short joke",
+      "type": "text"
+    }
+  ],
+  "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+  "model": "claude-3-7-sonnet-20250219",
+  "role": "assistant",
+  "stop_reason": "end_turn",
+  "stop_sequence": null,
+  "type": "message",
+  "usage": {
+    "input_tokens": 2095,
+    "output_tokens": 503,
+    "cache_creation_input_tokens": 2095,
+    "cache_read_input_tokens": 0
+  }
+}
+```
+
+#### Streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    api_key=api_key,
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+    stream=True,
+)
+async for chunk in response:
+    print(chunk)
+```
+
+### LiteLLM Proxy Server 
 
-<Tabs>
-<TabItem label="PROXY" value="proxy">
 
 1. Setup config.yaml
 
@@ -42,7 +95,28 @@ litellm --config /path/to/config.yaml
 
 3. Test it! 
 
-```bash
+<Tabs>
+<TabItem label="Anthropic Python SDK" value="python">
+
+```python showLineNumbers title="Example using LiteLLM Proxy Server"
+import anthropic
+
+# point anthropic sdk to litellm proxy 
+client = anthropic.Anthropic(
+    base_url="http://0.0.0.0:4000",
+    api_key="sk-1234",
+)
+
+response = client.messages.create(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+)
+```
+</TabItem>
+<TabItem label="curl" value="curl">
+
+```bash showLineNumbers title="Example using LiteLLM Proxy Server"
 curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
 -H 'content-type: application/json' \
 -H 'x-api-key: $LITELLM_API_KEY' \
@@ -52,41 +126,176 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
   "messages": [
     {
       "role": "user",
-      "content": [
-        {
-          "type": "text",
-          "text": "List 5 important events in the XIX century"
-        }
-      ]
+      "content": "Hello, can you tell me a short joke?"
     }
   ],
-  "max_tokens": 4096
+  "max_tokens": 100
 }'
 ```
+
 </TabItem>
-<TabItem value="sdk" label="SDK">
+</Tabs>
 
-```python
-from litellm.llms.anthropic.experimental_pass_through.messages.handler import anthropic_messages
-import asyncio 
-import os 
 
-# set env 
-os.environ["ANTHROPIC_API_KEY"] = "my-api-key"
+## Request Format
+---
 
-messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
+Request body will be in the Anthropic messages API format. **litellm follows the Anthropic messages specification for this endpoint.**
 
-# Call the handler
-async def call(): 
-    response = await anthropic_messages(
-        messages=messages,
-        api_key=api_key,
-        model="claude-3-haiku-20240307",
-        max_tokens=100,
-    )
+#### Example request body
 
-asyncio.run(call())
+```json
+{
+  "model": "claude-3-7-sonnet-20250219",
+  "max_tokens": 1024,
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hello, world"
+    }
+  ]
+}
 ```
 
-</TabItem>
-</Tabs>
\ No newline at end of file
+#### Required Fields
+- **model** (string):  
+  The model identifier (e.g., `"claude-3-7-sonnet-20250219"`).
+- **max_tokens** (integer):  
+  The maximum number of tokens to generate before stopping.  
+  _Note: The model may stop before reaching this limit; value must be greater than 1._
+- **messages** (array of objects):  
+  An ordered list of conversational turns.  
+  Each message object must include:
+  - **role** (enum: `"user"` or `"assistant"`):  
+    Specifies the speaker of the message.
+  - **content** (string or array of content blocks):  
+    The text or content blocks (e.g., an array containing objects with a `type` such as `"text"`) that form the message.  
+    _Example equivalence:_
+    ```json
+    {"role": "user", "content": "Hello, Claude"}
+    ```
+    is equivalent to:
+    ```json
+    {"role": "user", "content": [{"type": "text", "text": "Hello, Claude"}]}
+    ```
+
+#### Optional Fields
+- **metadata** (object):  
+  Contains additional metadata about the request (e.g., `user_id` as an opaque identifier).
+- **stop_sequences** (array of strings):  
+  Custom sequences that, when encountered in the generated text, cause the model to stop.
+- **stream** (boolean):  
+  Indicates whether to stream the response using server-sent events.
+- **system** (string or array):  
+  A system prompt providing context or specific instructions to the model.
+- **temperature** (number):  
+  Controls randomness in the model’s responses. Valid range: `0 < temperature < 1`.
+- **thinking** (object):  
+  Configuration for enabling extended thinking. If enabled, it includes:
+  - **budget_tokens** (integer):  
+    Minimum of 1024 tokens (and less than `max_tokens`).
+  - **type** (enum):  
+    E.g., `"enabled"`.
+- **tool_choice** (object):  
+  Instructs how the model should utilize any provided tools.
+- **tools** (array of objects):  
+  Definitions for tools available to the model. Each tool includes:
+  - **name** (string):  
+    The tool’s name.
+  - **description** (string):  
+    A detailed description of the tool.
+  - **input_schema** (object):  
+    A JSON schema describing the expected input format for the tool.
+- **top_k** (integer):  
+  Limits sampling to the top K options.
+- **top_p** (number):  
+  Enables nucleus sampling with a cumulative probability cutoff. Valid range: `0 < top_p < 1`.
+
+
+## Response Format
+---
+
+Responses will be in the Anthropic messages API format.
+
+#### Example Response
+
+```json
+{
+  "content": [
+    {
+      "text": "Hi! My name is Claude.",
+      "type": "text"
+    }
+  ],
+  "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+  "model": "claude-3-7-sonnet-20250219",
+  "role": "assistant",
+  "stop_reason": "end_turn",
+  "stop_sequence": null,
+  "type": "message",
+  "usage": {
+    "input_tokens": 2095,
+    "output_tokens": 503,
+    "cache_creation_input_tokens": 2095,
+    "cache_read_input_tokens": 0
+  }
+}
+```
+
+#### Response fields
+
+- **content** (array of objects):  
+  Contains the generated content blocks from the model. Each block includes:
+  - **type** (string):  
+    Indicates the type of content (e.g., `"text"`, `"tool_use"`, `"thinking"`, or `"redacted_thinking"`).
+  - **text** (string):  
+    The generated text from the model.  
+    _Note: Maximum length is 5,000,000 characters._
+  - **citations** (array of objects or `null`):  
+    Optional field providing citation details. Each citation includes:
+    - **cited_text** (string):  
+      The excerpt being cited.
+    - **document_index** (integer):  
+      An index referencing the cited document.
+    - **document_title** (string or `null`):  
+      The title of the cited document.
+    - **start_char_index** (integer):  
+      The starting character index for the citation.
+    - **end_char_index** (integer):  
+      The ending character index for the citation.
+    - **type** (string):  
+      Typically `"char_location"`.
+
+- **id** (string):  
+  A unique identifier for the response message.  
+  _Note: The format and length of IDs may change over time._
+
+- **model** (string):  
+  Specifies the model that generated the response.
+
+- **role** (string):  
+  Indicates the role of the generated message. For responses, this is always `"assistant"`.
+
+- **stop_reason** (string):  
+  Explains why the model stopped generating text. Possible values include:
+  - `"end_turn"`: The model reached a natural stopping point.
+  - `"max_tokens"`: The generation stopped because the maximum token limit was reached.
+  - `"stop_sequence"`: A custom stop sequence was encountered.
+  - `"tool_use"`: The model invoked one or more tools.
+
+- **stop_sequence** (string or `null`):  
+  Contains the specific stop sequence that caused the generation to halt, if applicable; otherwise, it is `null`.
+
+- **type** (string):  
+  Denotes the type of response object, which is always `"message"`.
+
+- **usage** (object):  
+  Provides details on token usage for billing and rate limiting. This includes:
+  - **input_tokens** (integer):  
+    Total number of input tokens processed.
+  - **output_tokens** (integer):  
+    Total number of output tokens generated.
+  - **cache_creation_input_tokens** (integer or `null`):  
+    Number of tokens used to create a cache entry.
+  - **cache_read_input_tokens** (integer or `null`):  
+    Number of tokens read from the cache.
diff --git a/litellm/__init__.py b/litellm/__init__.py
index c2e366e2b1..9997b9a8ac 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -1038,6 +1038,7 @@ from .cost_calculator import response_cost_calculator, cost_per_token
 
 ### ADAPTERS ###
 from .types.adapter import AdapterItem
+import litellm.anthropic_interface as anthropic
 
 adapters: List[AdapterItem] = []
 
diff --git a/litellm/anthropic_interface/__init__.py b/litellm/anthropic_interface/__init__.py
new file mode 100644
index 0000000000..9902fdc553
--- /dev/null
+++ b/litellm/anthropic_interface/__init__.py
@@ -0,0 +1,6 @@
+"""
+Anthropic module for LiteLLM
+"""
+from .messages import acreate, create
+
+__all__ = ["acreate", "create"]
diff --git a/litellm/anthropic_interface/messages/__init__.py b/litellm/anthropic_interface/messages/__init__.py
new file mode 100644
index 0000000000..f3249f981b
--- /dev/null
+++ b/litellm/anthropic_interface/messages/__init__.py
@@ -0,0 +1,117 @@
+"""
+Interface for Anthropic's messages API
+
+Use this to call LLMs in Anthropic /messages Request/Response format
+
+This is an __init__.py file to allow the following interface
+
+- litellm.messages.acreate
+- litellm.messages.create
+
+"""
+
+from typing import AsyncIterator, Dict, Iterator, List, Optional, Union
+
+from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
+    anthropic_messages as _async_anthropic_messages,
+)
+from litellm.types.llms.anthropic_messages.anthropic_response import (
+    AnthropicMessagesResponse,
+)
+
+
+async def acreate(
+    max_tokens: int,
+    messages: List[Dict],
+    model: str,
+    metadata: Optional[Dict] = None,
+    stop_sequences: Optional[List[str]] = None,
+    stream: Optional[bool] = False,
+    system: Optional[str] = None,
+    temperature: Optional[float] = 1.0,
+    thinking: Optional[Dict] = None,
+    tool_choice: Optional[Dict] = None,
+    tools: Optional[List[Dict]] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    **kwargs
+) -> Union[AnthropicMessagesResponse, AsyncIterator]:
+    """
+    Async wrapper for Anthropic's messages API
+
+    Args:
+        max_tokens (int): Maximum tokens to generate (required)
+        messages (List[Dict]): List of message objects with role and content (required)
+        model (str): Model name to use (required)
+        metadata (Dict, optional): Request metadata
+        stop_sequences (List[str], optional): Custom stop sequences
+        stream (bool, optional): Whether to stream the response
+        system (str, optional): System prompt
+        temperature (float, optional): Sampling temperature (0.0 to 1.0)
+        thinking (Dict, optional): Extended thinking configuration
+        tool_choice (Dict, optional): Tool choice configuration
+        tools (List[Dict], optional): List of tool definitions
+        top_k (int, optional): Top K sampling parameter
+        top_p (float, optional): Nucleus sampling parameter
+        **kwargs: Additional arguments
+
+    Returns:
+        Dict: Response from the API
+    """
+    return await _async_anthropic_messages(
+        max_tokens=max_tokens,
+        messages=messages,
+        model=model,
+        metadata=metadata,
+        stop_sequences=stop_sequences,
+        stream=stream,
+        system=system,
+        temperature=temperature,
+        thinking=thinking,
+        tool_choice=tool_choice,
+        tools=tools,
+        top_k=top_k,
+        top_p=top_p,
+        **kwargs,
+    )
+
+
+async def create(
+    max_tokens: int,
+    messages: List[Dict],
+    model: str,
+    metadata: Optional[Dict] = None,
+    stop_sequences: Optional[List[str]] = None,
+    stream: Optional[bool] = False,
+    system: Optional[str] = None,
+    temperature: Optional[float] = 1.0,
+    thinking: Optional[Dict] = None,
+    tool_choice: Optional[Dict] = None,
+    tools: Optional[List[Dict]] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    **kwargs
+) -> Union[AnthropicMessagesResponse, Iterator]:
+    """
+    Async wrapper for Anthropic's messages API
+
+    Args:
+        max_tokens (int): Maximum tokens to generate (required)
+        messages (List[Dict]): List of message objects with role and content (required)
+        model (str): Model name to use (required)
+        metadata (Dict, optional): Request metadata
+        stop_sequences (List[str], optional): Custom stop sequences
+        stream (bool, optional): Whether to stream the response
+        system (str, optional): System prompt
+        temperature (float, optional): Sampling temperature (0.0 to 1.0)
+        thinking (Dict, optional): Extended thinking configuration
+        tool_choice (Dict, optional): Tool choice configuration
+        tools (List[Dict], optional): List of tool definitions
+        top_k (int, optional): Top K sampling parameter
+        top_p (float, optional): Nucleus sampling parameter
+        **kwargs: Additional arguments
+
+    Returns:
+        Dict: Response from the API
+    """
+    raise NotImplementedError("This function is not implemented")
diff --git a/litellm/anthropic_interface/readme.md b/litellm/anthropic_interface/readme.md
new file mode 100644
index 0000000000..01c5f1b7c3
--- /dev/null
+++ b/litellm/anthropic_interface/readme.md
@@ -0,0 +1,116 @@
+## Use LLM API endpoints in Anthropic Interface
+
+Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking.
+
+
+## Usage 
+---
+
+### LiteLLM Python SDK 
+
+#### Non-streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    api_key=api_key,
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+)
+```
+
+Example response:
+```json
+{
+  "content": [
+    {
+      "text": "Hi! this is a very short joke",
+      "type": "text"
+    }
+  ],
+  "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+  "model": "claude-3-7-sonnet-20250219",
+  "role": "assistant",
+  "stop_reason": "end_turn",
+  "stop_sequence": null,
+  "type": "message",
+  "usage": {
+    "input_tokens": 2095,
+    "output_tokens": 503,
+    "cache_creation_input_tokens": 2095,
+    "cache_read_input_tokens": 0
+  }
+}
+```
+
+#### Streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    api_key=api_key,
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+    stream=True,
+)
+async for chunk in response:
+    print(chunk)
+```
+
+### LiteLLM Proxy Server 
+
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+    - model_name: anthropic-claude
+      litellm_params:
+        model: claude-3-7-sonnet-latest
+```
+
+2. Start proxy 
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+3. Test it! 
+
+<Tabs>
+<TabItem label="Anthropic Python SDK" value="python">
+
+```python showLineNumbers title="Example using LiteLLM Proxy Server"
+import anthropic
+
+# point anthropic sdk to litellm proxy 
+client = anthropic.Anthropic(
+    base_url="http://0.0.0.0:4000",
+    api_key="sk-1234",
+)
+
+response = client.messages.create(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+)
+```
+</TabItem>
+<TabItem label="curl" value="curl">
+
+```bash showLineNumbers title="Example using LiteLLM Proxy Server"
+curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
+-H 'content-type: application/json' \
+-H 'x-api-key: $LITELLM_API_KEY' \
+-H 'anthropic-version: 2023-06-01' \
+-d '{
+  "model": "anthropic-claude",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hello, can you tell me a short joke?"
+    }
+  ],
+  "max_tokens": 100
+}'
+```
\ No newline at end of file
diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
index 099a2acdae..a37d816770 100644
--- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
@@ -6,7 +6,7 @@
 """
 
 import json
-from typing import Any, AsyncIterator, Dict, Optional, Union, cast
+from typing import AsyncIterator, Dict, List, Optional, Union, cast
 
 import httpx
 
@@ -19,6 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     get_async_httpx_client,
 )
+from litellm.types.llms.anthropic_messages.anthropic_response import (
+    AnthropicMessagesResponse,
+)
 from litellm.types.router import GenericLiteLLMParams
 from litellm.types.utils import ProviderSpecificHeader
 from litellm.utils import ProviderConfigManager, client
@@ -60,14 +63,25 @@ class AnthropicMessagesHandler:
 
 @client
 async def anthropic_messages(
-    api_key: str,
+    max_tokens: int,
+    messages: List[Dict],
     model: str,
-    stream: bool = False,
+    metadata: Optional[Dict] = None,
+    stop_sequences: Optional[List[str]] = None,
+    stream: Optional[bool] = False,
+    system: Optional[str] = None,
+    temperature: Optional[float] = None,
+    thinking: Optional[Dict] = None,
+    tool_choice: Optional[Dict] = None,
+    tools: Optional[List[Dict]] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    api_key: Optional[str] = None,
     api_base: Optional[str] = None,
     client: Optional[AsyncHTTPHandler] = None,
     custom_llm_provider: Optional[str] = None,
     **kwargs,
-) -> Union[Dict[str, Any], AsyncIterator]:
+) -> Union[AnthropicMessagesResponse, AsyncIterator]:
     """
     Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
     """
@@ -129,10 +143,8 @@ async def anthropic_messages(
         },
         custom_llm_provider=_custom_llm_provider,
     )
-    litellm_logging_obj.model_call_details.update(kwargs)
-
     # Prepare request body
-    request_body = kwargs.copy()
+    request_body = locals().copy()
     request_body = {
         k: v
         for k, v in request_body.items()
@@ -140,10 +152,12 @@ async def anthropic_messages(
         in anthropic_messages_provider_config.get_supported_anthropic_messages_params(
             model=model
         )
+        and v is not None
     }
     request_body["stream"] = stream
     request_body["model"] = model
     litellm_logging_obj.stream = stream
+    litellm_logging_obj.model_call_details.update(request_body)
 
     # Make the request
     request_url = anthropic_messages_provider_config.get_complete_url(
@@ -164,7 +178,7 @@ async def anthropic_messages(
         url=request_url,
         headers=headers,
         data=json.dumps(request_body),
-        stream=stream,
+        stream=stream or False,
     )
     response.raise_for_status()
 
diff --git a/litellm/types/llms/anthropic_messages/anthropic_response.py b/litellm/types/llms/anthropic_messages/anthropic_response.py
new file mode 100644
index 0000000000..270807fc8f
--- /dev/null
+++ b/litellm/types/llms/anthropic_messages/anthropic_response.py
@@ -0,0 +1,83 @@
+from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
+
+from typing_extensions import TypeAlias
+
+
+class AnthropicResponseTextBlock(TypedDict, total=False):
+    """
+    Anthropic Response Text Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    citations: Optional[List[Dict[str, Any]]]
+    text: str
+    type: Literal["text"]
+
+
+class AnthropicResponseToolUseBlock(TypedDict, total=False):
+    """
+    Anthropic Response Tool Use Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    id: Optional[str]
+    input: Optional[str]
+    name: Optional[str]
+    type: Literal["tool_use"]
+
+
+class AnthropicResponseThinkingBlock(TypedDict, total=False):
+    """
+    Anthropic Response Thinking Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    signature: Optional[str]
+    thinking: Optional[str]
+    type: Literal["thinking"]
+
+
+class AnthropicResponseRedactedThinkingBlock(TypedDict, total=False):
+    """
+    Anthropic Response Redacted Thinking Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    data: Optional[str]
+    type: Literal["redacted_thinking"]
+
+
+AnthropicResponseContentBlock: TypeAlias = Union[
+    AnthropicResponseTextBlock,
+    AnthropicResponseToolUseBlock,
+    AnthropicResponseThinkingBlock,
+    AnthropicResponseRedactedThinkingBlock,
+]
+
+
+class AnthropicUsage(TypedDict, total=False):
+    """
+    Input and output tokens used in the request
+    """
+
+    input_tokens: int
+    output_tokens: int
+
+    """
+    Cache Tokens Used
+    """
+    cache_creation_input_tokens: int
+    cache_read_input_tokens: int
+
+
+class AnthropicMessagesResponse(TypedDict, total=False):
+    """
+    Anthropic Messages API Response: https://docs.anthropic.com/en/api/messages
+    """
+
+    content: Optional[List[AnthropicResponseContentBlock]]
+    id: str
+    model: Optional[str]  # This represents the Model type from Anthropic
+    role: Optional[Literal["assistant"]]
+    stop_reason: Optional[
+        Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
+    ]
+    stop_sequence: Optional[str]
+    type: Optional[Literal["message"]]
+    usage: Optional[AnthropicUsage]
diff --git a/mypy.ini b/mypy.ini
index 19ead3ba7d..3ce8c5fcc0 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -2,6 +2,7 @@
 warn_return_any = False
 ignore_missing_imports = True
 mypy_path = litellm/stubs
+namespace_packages = True
 
 [mypy-google.*]
 ignore_missing_imports = True
diff --git a/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py b/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py
index b5b3302acc..ec268b1a24 100644
--- a/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py
+++ b/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py
@@ -8,7 +8,7 @@ import unittest.mock
 from unittest.mock import AsyncMock, MagicMock
 
 sys.path.insert(
-    0, os.path.abspath("../..")
+    0, os.path.abspath("../../..")
 )  # Adds the parent directory to the system path
 import litellm
 import pytest
@@ -16,6 +16,7 @@ from dotenv import load_dotenv
 from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
     anthropic_messages,
 )
+
 from typing import Optional
 from litellm.types.utils import StandardLoggingPayload
 from litellm.integrations.custom_logger import CustomLogger
@@ -73,6 +74,7 @@ async def test_anthropic_messages_non_streaming():
     """
     Test the anthropic_messages with non-streaming request
     """
+    litellm._turn_on_debug()
     # Get API key from environment
     api_key = os.getenv("ANTHROPIC_API_KEY")
     if not api_key:
@@ -82,7 +84,7 @@ async def test_anthropic_messages_non_streaming():
     messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
 
     # Call the handler
-    response = await anthropic_messages(
+    response = await litellm.anthropic.messages.acreate(
         messages=messages,
         api_key=api_key,
         model="claude-3-haiku-20240307",
@@ -114,7 +116,7 @@ async def test_anthropic_messages_streaming():
 
     # Call the handler
     async_httpx_client = AsyncHTTPHandler()
-    response = await anthropic_messages(
+    response = await litellm.anthropic.messages.acreate(
         messages=messages,
         api_key=api_key,
         model="claude-3-haiku-20240307",
@@ -134,7 +136,7 @@ async def test_anthropic_messages_streaming_with_bad_request():
     Test the anthropic_messages with streaming request
     """
     try:
-        response = await anthropic_messages(
+        response = await litellm.anthropic.messages.acreate(
             messages=["hi"],
             api_key=os.getenv("ANTHROPIC_API_KEY"),
             model="claude-3-haiku-20240307",
@@ -458,7 +460,7 @@ async def test_anthropic_messages_with_extra_headers():
     mock_client.post = AsyncMock(return_value=mock_response)
 
     # Call the handler with extra_headers and our mocked client
-    response = await anthropic_messages(
+    response = await litellm.anthropic.messages.acreate(
         messages=messages,
         api_key=api_key,
         model="claude-3-haiku-20240307",