Merge pull request #9567 from BerriAI/litellm_anthropic_messages_improvements

[Refactor] - Expose litellm.messages.acreate() and litellm.messages.create() to make LLM API calls in Anthropic API spec
2025-04-27 03:34:10 +00:00 · 2025-03-31 20:50:30 -07:00 · 2025-03-31 20:50:30 -07:00 · bc5cc51b9d
commit bc5cc51b9d
parent 46b3dbde8f d91bf3cc91
9 changed files with 593 additions and 44 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -1038,6 +1038,7 @@ from .cost_calculator import response_cost_calculator, cost_per_token

 ### ADAPTERS ###
 from .types.adapter import AdapterItem
+import litellm.anthropic_interface as anthropic

 adapters: List[AdapterItem] = []

--- a/litellm/anthropic_interface/init.py
+++ b/litellm/anthropic_interface/init.py
@ -0,0 +1,6 @@
+"""
+Anthropic module for LiteLLM
+"""
+from .messages import acreate, create
+
+__all__ = ["acreate", "create"]
--- a/litellm/anthropic_interface/messages/init.py
+++ b/litellm/anthropic_interface/messages/init.py
@ -0,0 +1,117 @@
+"""
+Interface for Anthropic's messages API
+
+Use this to call LLMs in Anthropic /messages Request/Response format
+
+This is an __init__.py file to allow the following interface
+
+- litellm.messages.acreate
+- litellm.messages.create
+
+"""
+
+from typing import AsyncIterator, Dict, Iterator, List, Optional, Union
+
+from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
+    anthropic_messages as _async_anthropic_messages,
+)
+from litellm.types.llms.anthropic_messages.anthropic_response import (
+    AnthropicMessagesResponse,
+)
+
+
+async def acreate(
+    max_tokens: int,
+    messages: List[Dict],
+    model: str,
+    metadata: Optional[Dict] = None,
+    stop_sequences: Optional[List[str]] = None,
+    stream: Optional[bool] = False,
+    system: Optional[str] = None,
+    temperature: Optional[float] = 1.0,
+    thinking: Optional[Dict] = None,
+    tool_choice: Optional[Dict] = None,
+    tools: Optional[List[Dict]] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    **kwargs
+) -> Union[AnthropicMessagesResponse, AsyncIterator]:
+    """
+    Async wrapper for Anthropic's messages API
+
+    Args:
+        max_tokens (int): Maximum tokens to generate (required)
+        messages (List[Dict]): List of message objects with role and content (required)
+        model (str): Model name to use (required)
+        metadata (Dict, optional): Request metadata
+        stop_sequences (List[str], optional): Custom stop sequences
+        stream (bool, optional): Whether to stream the response
+        system (str, optional): System prompt
+        temperature (float, optional): Sampling temperature (0.0 to 1.0)
+        thinking (Dict, optional): Extended thinking configuration
+        tool_choice (Dict, optional): Tool choice configuration
+        tools (List[Dict], optional): List of tool definitions
+        top_k (int, optional): Top K sampling parameter
+        top_p (float, optional): Nucleus sampling parameter
+        **kwargs: Additional arguments
+
+    Returns:
+        Dict: Response from the API
+    """
+    return await _async_anthropic_messages(
+        max_tokens=max_tokens,
+        messages=messages,
+        model=model,
+        metadata=metadata,
+        stop_sequences=stop_sequences,
+        stream=stream,
+        system=system,
+        temperature=temperature,
+        thinking=thinking,
+        tool_choice=tool_choice,
+        tools=tools,
+        top_k=top_k,
+        top_p=top_p,
+        **kwargs,
+    )
+
+
+async def create(
+    max_tokens: int,
+    messages: List[Dict],
+    model: str,
+    metadata: Optional[Dict] = None,
+    stop_sequences: Optional[List[str]] = None,
+    stream: Optional[bool] = False,
+    system: Optional[str] = None,
+    temperature: Optional[float] = 1.0,
+    thinking: Optional[Dict] = None,
+    tool_choice: Optional[Dict] = None,
+    tools: Optional[List[Dict]] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    **kwargs
+) -> Union[AnthropicMessagesResponse, Iterator]:
+    """
+    Async wrapper for Anthropic's messages API
+
+    Args:
+        max_tokens (int): Maximum tokens to generate (required)
+        messages (List[Dict]): List of message objects with role and content (required)
+        model (str): Model name to use (required)
+        metadata (Dict, optional): Request metadata
+        stop_sequences (List[str], optional): Custom stop sequences
+        stream (bool, optional): Whether to stream the response
+        system (str, optional): System prompt
+        temperature (float, optional): Sampling temperature (0.0 to 1.0)
+        thinking (Dict, optional): Extended thinking configuration
+        tool_choice (Dict, optional): Tool choice configuration
+        tools (List[Dict], optional): List of tool definitions
+        top_k (int, optional): Top K sampling parameter
+        top_p (float, optional): Nucleus sampling parameter
+        **kwargs: Additional arguments
+
+    Returns:
+        Dict: Response from the API
+    """
+    raise NotImplementedError("This function is not implemented")
--- a/litellm/anthropic_interface/readme.md
+++ b/litellm/anthropic_interface/readme.md
@ -0,0 +1,116 @@
+## Use LLM API endpoints in Anthropic Interface
+
+Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking.
+
+
+## Usage 
+---
+
+### LiteLLM Python SDK 
+
+#### Non-streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    api_key=api_key,
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+)
+```
+
+Example response:
+```json
+{
+  "content": [
+    {
+      "text": "Hi! this is a very short joke",
+      "type": "text"
+    }
+  ],
+  "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+  "model": "claude-3-7-sonnet-20250219",
+  "role": "assistant",
+  "stop_reason": "end_turn",
+  "stop_sequence": null,
+  "type": "message",
+  "usage": {
+    "input_tokens": 2095,
+    "output_tokens": 503,
+    "cache_creation_input_tokens": 2095,
+    "cache_read_input_tokens": 0
+  }
+}
+```
+
+#### Streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    api_key=api_key,
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+    stream=True,
+)
+async for chunk in response:
+    print(chunk)
+```
+
+### LiteLLM Proxy Server 
+
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+    - model_name: anthropic-claude
+      litellm_params:
+        model: claude-3-7-sonnet-latest
+```
+
+2. Start proxy 
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+3. Test it! 
+
+<Tabs>
+<TabItem label="Anthropic Python SDK" value="python">
+
+```python showLineNumbers title="Example using LiteLLM Proxy Server"
+import anthropic
+
+# point anthropic sdk to litellm proxy 
+client = anthropic.Anthropic(
+    base_url="http://0.0.0.0:4000",
+    api_key="sk-1234",
+)
+
+response = client.messages.create(
+    messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+    model="anthropic/claude-3-haiku-20240307",
+    max_tokens=100,
+)
+```
+</TabItem>
+<TabItem label="curl" value="curl">
+
+```bash showLineNumbers title="Example using LiteLLM Proxy Server"
+curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
+-H 'content-type: application/json' \
+-H 'x-api-key: $LITELLM_API_KEY' \
+-H 'anthropic-version: 2023-06-01' \
+-d '{
+  "model": "anthropic-claude",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hello, can you tell me a short joke?"
+    }
+  ],
+  "max_tokens": 100
+}'
+```
--- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
@ -6,7 +6,7 @@
 """

 import json
-from typing import Any, AsyncIterator, Dict, Optional, Union, cast
+from typing import AsyncIterator, Dict, List, Optional, Union, cast

 import httpx

@ -19,6 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    get_async_httpx_client,
 )
+from litellm.types.llms.anthropic_messages.anthropic_response import (
+    AnthropicMessagesResponse,
+)
 from litellm.types.router import GenericLiteLLMParams
 from litellm.types.utils import ProviderSpecificHeader
 from litellm.utils import ProviderConfigManager, client
@ -60,14 +63,25 @@ class AnthropicMessagesHandler:

@client
 async def anthropic_messages(
-    api_key: str,
+    max_tokens: int,
+    messages: List[Dict],
    model: str,
-    stream: bool = False,
+    metadata: Optional[Dict] = None,
+    stop_sequences: Optional[List[str]] = None,
+    stream: Optional[bool] = False,
+    system: Optional[str] = None,
+    temperature: Optional[float] = None,
+    thinking: Optional[Dict] = None,
+    tool_choice: Optional[Dict] = None,
+    tools: Optional[List[Dict]] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    api_key: Optional[str] = None,
    api_base: Optional[str] = None,
    client: Optional[AsyncHTTPHandler] = None,
    custom_llm_provider: Optional[str] = None,
    **kwargs,
-) -> Union[Dict[str, Any], AsyncIterator]:
+) -> Union[AnthropicMessagesResponse, AsyncIterator]:
    """
    Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
    """
@ -129,10 +143,8 @@ async def anthropic_messages(
        },
        custom_llm_provider=_custom_llm_provider,
    )
-    litellm_logging_obj.model_call_details.update(kwargs)
-
    # Prepare request body
-    request_body = kwargs.copy()
+    request_body = locals().copy()
    request_body = {
        k: v
        for k, v in request_body.items()
@ -140,10 +152,12 @@ async def anthropic_messages(
        in anthropic_messages_provider_config.get_supported_anthropic_messages_params(
            model=model
        )
+        and v is not None
    }
    request_body["stream"] = stream
    request_body["model"] = model
    litellm_logging_obj.stream = stream
+    litellm_logging_obj.model_call_details.update(request_body)

    # Make the request
    request_url = anthropic_messages_provider_config.get_complete_url(
@ -164,7 +178,7 @@ async def anthropic_messages(
        url=request_url,
        headers=headers,
        data=json.dumps(request_body),
-        stream=stream,
+        stream=stream or False,
    )
    response.raise_for_status()

--- a/litellm/types/llms/anthropic_messages/anthropic_response.py
+++ b/litellm/types/llms/anthropic_messages/anthropic_response.py
@ -0,0 +1,83 @@
+from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
+
+from typing_extensions import TypeAlias
+
+
+class AnthropicResponseTextBlock(TypedDict, total=False):
+    """
+    Anthropic Response Text Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    citations: Optional[List[Dict[str, Any]]]
+    text: str
+    type: Literal["text"]
+
+
+class AnthropicResponseToolUseBlock(TypedDict, total=False):
+    """
+    Anthropic Response Tool Use Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    id: Optional[str]
+    input: Optional[str]
+    name: Optional[str]
+    type: Literal["tool_use"]
+
+
+class AnthropicResponseThinkingBlock(TypedDict, total=False):
+    """
+    Anthropic Response Thinking Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    signature: Optional[str]
+    thinking: Optional[str]
+    type: Literal["thinking"]
+
+
+class AnthropicResponseRedactedThinkingBlock(TypedDict, total=False):
+    """
+    Anthropic Response Redacted Thinking Block: https://docs.anthropic.com/en/api/messages
+    """
+
+    data: Optional[str]
+    type: Literal["redacted_thinking"]
+
+
+AnthropicResponseContentBlock: TypeAlias = Union[
+    AnthropicResponseTextBlock,
+    AnthropicResponseToolUseBlock,
+    AnthropicResponseThinkingBlock,
+    AnthropicResponseRedactedThinkingBlock,
+]
+
+
+class AnthropicUsage(TypedDict, total=False):
+    """
+    Input and output tokens used in the request
+    """
+
+    input_tokens: int
+    output_tokens: int
+
+    """
+    Cache Tokens Used
+    """
+    cache_creation_input_tokens: int
+    cache_read_input_tokens: int
+
+
+class AnthropicMessagesResponse(TypedDict, total=False):
+    """
+    Anthropic Messages API Response: https://docs.anthropic.com/en/api/messages
+    """
+
+    content: Optional[List[AnthropicResponseContentBlock]]
+    id: str
+    model: Optional[str]  # This represents the Model type from Anthropic
+    role: Optional[Literal["assistant"]]
+    stop_reason: Optional[
+        Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
+    ]
+    stop_sequence: Optional[str]
+    type: Optional[Literal["message"]]
+    usage: Optional[AnthropicUsage]