Merge pull request #9567 from BerriAI/litellm_anthropic_messages_improvements

[Refactor] - Expose litellm.messages.acreate() and litellm.messages.create() to make LLM API calls in Anthropic API spec
2025-04-24 18:24:20 +00:00 · 2025-03-31 20:50:30 -07:00 · 2025-03-31 20:50:30 -07:00 · bc5cc51b9d
commit bc5cc51b9d
parent 46b3dbde8f d91bf3cc91
9 changed files with 593 additions and 44 deletions
--- a/docs/my-website/docs/anthropic_unified.md
+++ b/docs/my-website/docs/anthropic_unified.md
@ -3,9 +3,10 @@ import TabItem from '@theme/TabItem';

 # /v1/messages [BETA] 

-LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint. 
+Use LiteLLM to call all your LLM APIs in the Anthropic `v1/messages` format. 

-This currently just supports the Anthropic API. 
+
+## Overview 

 | Feature | Supported | Notes | 
 |-------|-------|-------|
@ -21,9 +22,61 @@ Planned improvement:
 - Bedrock Anthropic support

 ## Usage 
+---
+
+### LiteLLM Python SDK 
+
+#### Non-streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    api_key=api_key,
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+)
+```
+
+Example response:
+```json
+{
+  "content": [
+    {
+      "text": "Hi! this is a very short joke",
+      "type": "text"
+    }
+  ],
+  "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+  "model": "claude-3-7-sonnet-20250219",
+  "role": "assistant",
+  "stop_reason": "end_turn",
+  "stop_sequence": null,
+  "type": "message",
+  "usage": {
+    "input_tokens": 2095,
+    "output_tokens": 503,
+    "cache_creation_input_tokens": 2095,
+    "cache_read_input_tokens": 0
+  }
+}
+```
+
+#### Streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    api_key=api_key,
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+    stream=True,
+)
+async for chunk in response:
+    print(chunk)
+```
+
+### LiteLLM Proxy Server 

-<Tabs>
-<TabItem label="PROXY" value="proxy">

 1. Setup config.yaml

@ -42,7 +95,28 @@ litellm --config /path/to/config.yaml

 3. Test it! 

-```bash
+<Tabs>
+<TabItem label="Anthropic Python SDK" value="python">
+
+```python showLineNumbers title="Example using LiteLLM Proxy Server"
+import anthropic
+
+# point anthropic sdk to litellm proxy 
+client = anthropic.Anthropic(
+    base_url="http://0.0.0.0:4000",
+    api_key="sk-1234",
+)
+
+response = client.messages.create(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+)
+```
+</TabItem>
+<TabItem label="curl" value="curl">
+
+```bash showLineNumbers title="Example using LiteLLM Proxy Server"
 curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
 -H 'content-type: application/json' \
 -H 'x-api-key: $LITELLM_API_KEY' \
@ -52,41 +126,176 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
  "messages": [
    {
      "role": "user",
-      "content": [
-        {
-          "type": "text",
-          "text": "List 5 important events in the XIX century"
-        }
-      ]
+      "content": "Hello, can you tell me a short joke?"
    }
  ],
-  "max_tokens": 4096
+  "max_tokens": 100
 }'
 ```
+
 </TabItem>
-<TabItem value="sdk" label="SDK">
+</Tabs>

-```python
-from litellm.llms.anthropic.experimental_pass_through.messages.handler import anthropic_messages
-import asyncio 
-import os 

-# set env 
-os.environ["ANTHROPIC_API_KEY"] = "my-api-key"
+## Request Format
+---

-messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
+Request body will be in the Anthropic messages API format. **litellm follows the Anthropic messages specification for this endpoint.**

-# Call the handler
-async def call(): 
-    response = await anthropic_messages(
-        messages=messages,
-        api_key=api_key,
-        model="claude-3-haiku-20240307",
-        max_tokens=100,
-    )
+#### Example request body

-asyncio.run(call())
+```json
+{
+  "model": "claude-3-7-sonnet-20250219",
+  "max_tokens": 1024,
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hello, world"
+    }
+  ]
+}
 ```

-</TabItem>
-</Tabs>
+#### Required Fields
+- **model** (string):  
+  The model identifier (e.g., `"claude-3-7-sonnet-20250219"`).
+- **max_tokens** (integer):  
+  The maximum number of tokens to generate before stopping.  
+  _Note: The model may stop before reaching this limit; value must be greater than 1._
+- **messages** (array of objects):  
+  An ordered list of conversational turns.  
+  Each message object must include:
+  - **role** (enum: `"user"` or `"assistant"`):  
+    Specifies the speaker of the message.
+  - **content** (string or array of content blocks):  
+    The text or content blocks (e.g., an array containing objects with a `type` such as `"text"`) that form the message.  
+    _Example equivalence:_
+    ```json
+    {"role": "user", "content": "Hello, Claude"}
+    ```
+    is equivalent to:
+    ```json
+    {"role": "user", "content": [{"type": "text", "text": "Hello, Claude"}]}
+    ```
+
+#### Optional Fields
+- **metadata** (object):  
+  Contains additional metadata about the request (e.g., `user_id` as an opaque identifier).
+- **stop_sequences** (array of strings):  
+  Custom sequences that, when encountered in the generated text, cause the model to stop.
+- **stream** (boolean):  
+  Indicates whether to stream the response using server-sent events.
+- **system** (string or array):  
+  A system prompt providing context or specific instructions to the model.
+- **temperature** (number):  
+  Controls randomness in the model’s responses. Valid range: `0 < temperature < 1`.
+- **thinking** (object):  
+  Configuration for enabling extended thinking. If enabled, it includes:
+  - **budget_tokens** (integer):  
+    Minimum of 1024 tokens (and less than `max_tokens`).
+  - **type** (enum):  
+    E.g., `"enabled"`.
+- **tool_choice** (object):  
+  Instructs how the model should utilize any provided tools.
+- **tools** (array of objects):  
+  Definitions for tools available to the model. Each tool includes:
+  - **name** (string):  
+    The tool’s name.
+  - **description** (string):  
+    A detailed description of the tool.
+  - **input_schema** (object):  
+    A JSON schema describing the expected input format for the tool.
+- **top_k** (integer):  
+  Limits sampling to the top K options.
+- **top_p** (number):  
+  Enables nucleus sampling with a cumulative probability cutoff. Valid range: `0 < top_p < 1`.
+
+
+## Response Format
+---
+
+Responses will be in the Anthropic messages API format.
+
+#### Example Response
+
+```json
+{
+  "content": [
+    {
+      "text": "Hi! My name is Claude.",
+      "type": "text"
+    }
+  ],
+  "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+  "model": "claude-3-7-sonnet-20250219",
+  "role": "assistant",
+  "stop_reason": "end_turn",
+  "stop_sequence": null,
+  "type": "message",
+  "usage": {
+    "input_tokens": 2095,
+    "output_tokens": 503,
+    "cache_creation_input_tokens": 2095,
+    "cache_read_input_tokens": 0
+  }
+}
+```
+
+#### Response fields
+
+- **content** (array of objects):  
+  Contains the generated content blocks from the model. Each block includes:
+  - **type** (string):  
+    Indicates the type of content (e.g., `"text"`, `"tool_use"`, `"thinking"`, or `"redacted_thinking"`).
+  - **text** (string):  
+    The generated text from the model.  
+    _Note: Maximum length is 5,000,000 characters._
+  - **citations** (array of objects or `null`):  
+    Optional field providing citation details. Each citation includes:
+    - **cited_text** (string):  
+      The excerpt being cited.
+    - **document_index** (integer):  
+      An index referencing the cited document.
+    - **document_title** (string or `null`):  
+      The title of the cited document.
+    - **start_char_index** (integer):  
+      The starting character index for the citation.
+    - **end_char_index** (integer):  
+      The ending character index for the citation.
+    - **type** (string):  
+      Typically `"char_location"`.
+
+- **id** (string):  
+  A unique identifier for the response message.  
+  _Note: The format and length of IDs may change over time._
+
+- **model** (string):  
+  Specifies the model that generated the response.
+
+- **role** (string):  
+  Indicates the role of the generated message. For responses, this is always `"assistant"`.
+
+- **stop_reason** (string):  
+  Explains why the model stopped generating text. Possible values include:
+  - `"end_turn"`: The model reached a natural stopping point.
+  - `"max_tokens"`: The generation stopped because the maximum token limit was reached.
+  - `"stop_sequence"`: A custom stop sequence was encountered.
+  - `"tool_use"`: The model invoked one or more tools.
+
+- **stop_sequence** (string or `null`):  
+  Contains the specific stop sequence that caused the generation to halt, if applicable; otherwise, it is `null`.
+
+- **type** (string):  
+  Denotes the type of response object, which is always `"message"`.
+
+- **usage** (object):  
+  Provides details on token usage for billing and rate limiting. This includes:
+  - **input_tokens** (integer):  
+    Total number of input tokens processed.
+  - **output_tokens** (integer):  
+    Total number of output tokens generated.
+  - **cache_creation_input_tokens** (integer or `null`):  
+    Number of tokens used to create a cache entry.
+  - **cache_read_input_tokens** (integer or `null`):  
+    Number of tokens read from the cache.
--- a/litellm/init.py
+++ b/litellm/init.py
@ -1038,6 +1038,7 @@ from .cost_calculator import response_cost_calculator, cost_per_token

 ### ADAPTERS ###
 from .types.adapter import AdapterItem
+import litellm.anthropic_interface as anthropic

 adapters: List[AdapterItem] = []

--- a/litellm/anthropic_interface/init.py
+++ b/litellm/anthropic_interface/init.py
@ -0,0 +1,6 @@
+"""
+Anthropic module for LiteLLM
+"""
+from .messages import acreate, create
+
+__all__ = ["acreate", "create"]
--- a/litellm/anthropic_interface/messages/init.py
+++ b/litellm/anthropic_interface/messages/init.py
@ -0,0 +1,117 @@
+"""
+Interface for Anthropic's messages API
+
+Use this to call LLMs in Anthropic /messages Request/Response format
+
+This is an __init__.py file to allow the following interface
+
+- litellm.messages.acreate
+- litellm.messages.create
+
+"""
+
+from typing import AsyncIterator, Dict, Iterator, List, Optional, Union
+
+from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
+    anthropic_messages as _async_anthropic_messages,
+)
+from litellm.types.llms.anthropic_messages.anthropic_response import (
+    AnthropicMessagesResponse,
+)
+
+
+async def acreate(
+    max_tokens: int,
+    messages: List[Dict],
+    model: str,
+    metadata: Optional[Dict] = None,
+    stop_sequences: Optional[List[str]] = None,
+    stream: Optional[bool] = False,
+    system: Optional[str] = None,
+    temperature: Optional[float] = 1.0,
+    thinking: Optional[Dict] = None,
+    tool_choice: Optional[Dict] = None,
+    tools: Optional[List[Dict]] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    **kwargs
+) -> Union[AnthropicMessagesResponse, AsyncIterator]:
+    """
+    Async wrapper for Anthropic's messages API
+
+    Args:
+        max_tokens (int): Maximum tokens to generate (required)
+        messages (List[Dict]): List of message objects with role and content (required)
+        model (str): Model name to use (required)
+        metadata (Dict, optional): Request metadata
+        stop_sequences (List[str], optional): Custom stop sequences
+        stream (bool, optional): Whether to stream the response
+        system (str, optional): System prompt
+        temperature (float, optional): Sampling temperature (0.0 to 1.0)
+        thinking (Dict, optional): Extended thinking configuration
+        tool_choice (Dict, optional): Tool choice configuration
+        tools (List[Dict], optional): List of tool definitions
+        top_k (int, optional): Top K sampling parameter
+        top_p (float, optional): Nucleus sampling parameter
+        **kwargs: Additional arguments
+
+    Returns:
+        Dict: Response from the API
+    """
+    return await _async_anthropic_messages(
+        max_tokens=max_tokens,
+        messages=messages,
+        model=model,
+        metadata=metadata,
+        stop_sequences=stop_sequences,
+        stream=stream,
+        system=system,
+        temperature=temperature,
+        thinking=thinking,
+        tool_choice=tool_choice,
+        tools=tools,
+        top_k=top_k,
+        top_p=top_p,
+        **kwargs,
+    )
+
+
+async def create(
+    max_tokens: int,
+    messages: List[Dict],
+    model: str,
+    metadata: Optional[Dict] = None,
+    stop_sequences: Optional[List[str]] = None,
+    stream: Optional[bool] = False,
+    system: Optional[str] = None,
+    temperature: Optional[float] = 1.0,
+    thinking: Optional[Dict] = None,
+    tool_choice: Optional[Dict] = None,
+    tools: Optional[List[Dict]] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    **kwargs
+) -> Union[AnthropicMessagesResponse, Iterator]:
+    """
+    Async wrapper for Anthropic's messages API
+
+    Args:
+        max_tokens (int): Maximum tokens to generate (required)
+        messages (List[Dict]): List of message objects with role and content (required)
+        model (str): Model name to use (required)
+        metadata (Dict, optional): Request metadata
+        stop_sequences (List[str], optional): Custom stop sequences
+        stream (bool, optional): Whether to stream the response
+        system (str, optional): System prompt
+        temperature (float, optional): Sampling temperature (0.0 to 1.0)
+        thinking (Dict, optional): Extended thinking configuration
+        tool_choice (Dict, optional): Tool choice configuration
+        tools (List[Dict], optional): List of tool definitions
+        top_k (int, optional): Top K sampling parameter
+        top_p (float, optional): Nucleus sampling parameter
+        **kwargs: Additional arguments
+
+    Returns:
+        Dict: Response from the API
+    """
+    raise NotImplementedError("This function is not implemented")
--- a/litellm/anthropic_interface/readme.md
+++ b/litellm/anthropic_interface/readme.md
@ -0,0 +1,116 @@
+## Use LLM API endpoints in Anthropic Interface
+
+Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking.
+
+
+## Usage 
+---
+
+### LiteLLM Python SDK 
+
+#### Non-streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    api_key=api_key,
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+)
+```
+
+Example response:
+```json
+{
+  "content": [
+    {
+      "text": "Hi! this is a very short joke",
+      "type": "text"
+    }
+  ],
+  "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+  "model": "claude-3-7-sonnet-20250219",
+  "role": "assistant",
+  "stop_reason": "end_turn",
+  "stop_sequence": null,
+  "type": "message",
+  "usage": {
+    "input_tokens": 2095,
+    "output_tokens": 503,
+    "cache_creation_input_tokens": 2095,
+    "cache_read_input_tokens": 0
+  }
+}
+```
+
+#### Streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    api_key=api_key,
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+    stream=True,
+)
+async for chunk in response:
+    print(chunk)
+```
+
+### LiteLLM Proxy Server 
+
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+    - model_name: anthropic-claude
+      litellm_params:
+        model: claude-3-7-sonnet-latest
+```
+
+2. Start proxy 
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+3. Test it! 
+
+<Tabs>
+<TabItem label="Anthropic Python SDK" value="python">
+
+```python showLineNumbers title="Example using LiteLLM Proxy Server"
+import anthropic
+
+# point anthropic sdk to litellm proxy 
+client = anthropic.Anthropic(
+    base_url="http://0.0.0.0:4000",
+    api_key="sk-1234",
+)
+
+response = client.messages.create(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+)
+```
+</TabItem>
+<TabItem label="curl" value="curl">
+
+```bash showLineNumbers title="Example using LiteLLM Proxy Server"
+curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
+-H 'content-type: application/json' \
+-H 'x-api-key: $LITELLM_API_KEY' \
+-H 'anthropic-version: 2023-06-01' \
+-d '{
+  "model": "anthropic-claude",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hello, can you tell me a short joke?"
+    }
+  ],
+  "max_tokens": 100
+}'
+```
--- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
@ -6,7 +6,7 @@
 """

 import json
-from typing import Any, AsyncIterator, Dict, Optional, Union, cast
+from typing import AsyncIterator, Dict, List, Optional, Union, cast

 import httpx

@ -19,6 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    get_async_httpx_client,
 )
+from litellm.types.llms.anthropic_messages.anthropic_response import (
+    AnthropicMessagesResponse,
+)
 from litellm.types.router import GenericLiteLLMParams
 from litellm.types.utils import ProviderSpecificHeader
 from litellm.utils import ProviderConfigManager, client
@ -60,14 +63,25 @@ class AnthropicMessagesHandler:

@client
 async def anthropic_messages(
-    api_key: str,
+    max_tokens: int,
+    messages: List[Dict],
    model: str,
-    stream: bool = False,
+    metadata: Optional[Dict] = None,
+    stop_sequences: Optional[List[str]] = None,
+    stream: Optional[bool] = False,
+    system: Optional[str] = None,
+    temperature: Optional[float] = None,
+    thinking: Optional[Dict] = None,
+    tool_choice: Optional[Dict] = None,
+    tools: Optional[List[Dict]] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    api_key: Optional[str] = None,
    api_base: Optional[str] = None,
    client: Optional[AsyncHTTPHandler] = None,
    custom_llm_provider: Optional[str] = None,
    **kwargs,
-) -> Union[Dict[str, Any], AsyncIterator]:
+) -> Union[AnthropicMessagesResponse, AsyncIterator]:
    """
    Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
    """
@ -129,10 +143,8 @@ async def anthropic_messages(
        },
        custom_llm_provider=_custom_llm_provider,
    )
-    litellm_logging_obj.model_call_details.update(kwargs)
-
    # Prepare request body
-    request_body = kwargs.copy()
+    request_body = locals().copy()
    request_body = {
        k: v
        for k, v in request_body.items()
@ -140,10 +152,12 @@ async def anthropic_messages(
        in anthropic_messages_provider_config.get_supported_anthropic_messages_params(
            model=model
        )
+        and v is not None
    }
    request_body["stream"] = stream
    request_body["model"] = model
    litellm_logging_obj.stream = stream
+    litellm_logging_obj.model_call_details.update(request_body)

    # Make the request
    request_url = anthropic_messages_provider_config.get_complete_url(
@ -164,7 +178,7 @@ async def anthropic_messages(
        url=request_url,
        headers=headers,
        data=json.dumps(request_body),
-        stream=stream,
+        stream=stream or False,
    )
    response.raise_for_status()

--- a/litellm/types/llms/anthropic_messages/anthropic_response.py
+++ b/litellm/types/llms/anthropic_messages/anthropic_response.py
@ -0,0 +1,83 @@
+from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
+
+from typing_extensions import TypeAlias
+
+
+class AnthropicResponseTextBlock(TypedDict, total=False):
+    """
+    Anthropic Response Text Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    citations: Optional[List[Dict[str, Any]]]
+    text: str
+    type: Literal["text"]
+
+
+class AnthropicResponseToolUseBlock(TypedDict, total=False):
+    """
+    Anthropic Response Tool Use Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    id: Optional[str]
+    input: Optional[str]
+    name: Optional[str]
+    type: Literal["tool_use"]
+
+
+class AnthropicResponseThinkingBlock(TypedDict, total=False):
+    """
+    Anthropic Response Thinking Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    signature: Optional[str]
+    thinking: Optional[str]
+    type: Literal["thinking"]
+
+
+class AnthropicResponseRedactedThinkingBlock(TypedDict, total=False):
+    """
+    Anthropic Response Redacted Thinking Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    data: Optional[str]
+    type: Literal["redacted_thinking"]
+
+
+AnthropicResponseContentBlock: TypeAlias = Union[
+    AnthropicResponseTextBlock,
+    AnthropicResponseToolUseBlock,
+    AnthropicResponseThinkingBlock,
+    AnthropicResponseRedactedThinkingBlock,
+]
+
+
+class AnthropicUsage(TypedDict, total=False):
+    """
+    Input and output tokens used in the request
+    """
+
+    input_tokens: int
+    output_tokens: int
+
+    """
+    Cache Tokens Used
+    """
+    cache_creation_input_tokens: int
+    cache_read_input_tokens: int
+
+
+class AnthropicMessagesResponse(TypedDict, total=False):
+    """
+    Anthropic Messages API Response: https://docs.anthropic.com/en/api/messages
+    """
+
+    content: Optional[List[AnthropicResponseContentBlock]]
+    id: str
+    model: Optional[str]  # This represents the Model type from Anthropic
+    role: Optional[Literal["assistant"]]
+    stop_reason: Optional[
+        Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
+    ]
+    stop_sequence: Optional[str]
+    type: Optional[Literal["message"]]
+    usage: Optional[AnthropicUsage]
--- a/mypy.ini
+++ b/mypy.ini
@ -2,6 +2,7 @@
 warn_return_any = False
 ignore_missing_imports = True
 mypy_path = litellm/stubs
+namespace_packages = True

 [mypy-google.*]
 ignore_missing_imports = True
--- a/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py
+++ b/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py
@ -8,7 +8,7 @@ import unittest.mock
 from unittest.mock import AsyncMock, MagicMock

 sys.path.insert(
-    0, os.path.abspath("../..")
+    0, os.path.abspath("../../..")
 )  # Adds the parent directory to the system path
 import litellm
 import pytest
@ -16,6 +16,7 @@ from dotenv import load_dotenv
 from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
    anthropic_messages,
 )
+
 from typing import Optional
 from litellm.types.utils import StandardLoggingPayload
 from litellm.integrations.custom_logger import CustomLogger
@ -73,6 +74,7 @@ async def test_anthropic_messages_non_streaming():
    """
    Test the anthropic_messages with non-streaming request
    """
+    litellm._turn_on_debug()
    # Get API key from environment
    api_key = os.getenv("ANTHROPIC_API_KEY")
    if not api_key:
@ -82,7 +84,7 @@ async def test_anthropic_messages_non_streaming():
    messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]

    # Call the handler
-    response = await anthropic_messages(
+    response = await litellm.anthropic.messages.acreate(
        messages=messages,
        api_key=api_key,
        model="claude-3-haiku-20240307",
@ -114,7 +116,7 @@ async def test_anthropic_messages_streaming():

    # Call the handler
    async_httpx_client = AsyncHTTPHandler()
-    response = await anthropic_messages(
+    response = await litellm.anthropic.messages.acreate(
        messages=messages,
        api_key=api_key,
        model="claude-3-haiku-20240307",
@ -134,7 +136,7 @@ async def test_anthropic_messages_streaming_with_bad_request():
    Test the anthropic_messages with streaming request
    """
    try:
-        response = await anthropic_messages(
+        response = await litellm.anthropic.messages.acreate(
            messages=["hi"],
            api_key=os.getenv("ANTHROPIC_API_KEY"),
            model="claude-3-haiku-20240307",
@ -458,7 +460,7 @@ async def test_anthropic_messages_with_extra_headers():
    mock_client.post = AsyncMock(return_value=mock_response)

    # Call the handler with extra_headers and our mocked client
-    response = await anthropic_messages(
+    response = await litellm.anthropic.messages.acreate(
        messages=messages,
        api_key=api_key,
        model="claude-3-haiku-20240307",