Merge pull request #9567 from BerriAI/litellm_anthropic_messages_improvements

[Refactor] - Expose litellm.messages.acreate() and  litellm.messages.create() to make LLM API calls in Anthropic API spec
This commit is contained in:
Ishaan Jaff 2025-03-31 20:50:30 -07:00 committed by GitHub
commit bc5cc51b9d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 593 additions and 44 deletions

View file

@ -1038,6 +1038,7 @@ from .cost_calculator import response_cost_calculator, cost_per_token
### ADAPTERS ###
from .types.adapter import AdapterItem
import litellm.anthropic_interface as anthropic
adapters: List[AdapterItem] = []

View file

@ -0,0 +1,6 @@
"""
Anthropic module for LiteLLM
"""
from .messages import acreate, create
__all__ = ["acreate", "create"]

View file

@ -0,0 +1,117 @@
"""
Interface for Anthropic's messages API
Use this to call LLMs in Anthropic /messages Request/Response format
This is an __init__.py file to allow the following interface
- litellm.messages.acreate
- litellm.messages.create
"""
from typing import AsyncIterator, Dict, Iterator, List, Optional, Union
from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
anthropic_messages as _async_anthropic_messages,
)
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
)
async def acreate(
max_tokens: int,
messages: List[Dict],
model: str,
metadata: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = 1.0,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
**kwargs
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
"""
Async wrapper for Anthropic's messages API
Args:
max_tokens (int): Maximum tokens to generate (required)
messages (List[Dict]): List of message objects with role and content (required)
model (str): Model name to use (required)
metadata (Dict, optional): Request metadata
stop_sequences (List[str], optional): Custom stop sequences
stream (bool, optional): Whether to stream the response
system (str, optional): System prompt
temperature (float, optional): Sampling temperature (0.0 to 1.0)
thinking (Dict, optional): Extended thinking configuration
tool_choice (Dict, optional): Tool choice configuration
tools (List[Dict], optional): List of tool definitions
top_k (int, optional): Top K sampling parameter
top_p (float, optional): Nucleus sampling parameter
**kwargs: Additional arguments
Returns:
Dict: Response from the API
"""
return await _async_anthropic_messages(
max_tokens=max_tokens,
messages=messages,
model=model,
metadata=metadata,
stop_sequences=stop_sequences,
stream=stream,
system=system,
temperature=temperature,
thinking=thinking,
tool_choice=tool_choice,
tools=tools,
top_k=top_k,
top_p=top_p,
**kwargs,
)
async def create(
max_tokens: int,
messages: List[Dict],
model: str,
metadata: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = 1.0,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
**kwargs
) -> Union[AnthropicMessagesResponse, Iterator]:
"""
Async wrapper for Anthropic's messages API
Args:
max_tokens (int): Maximum tokens to generate (required)
messages (List[Dict]): List of message objects with role and content (required)
model (str): Model name to use (required)
metadata (Dict, optional): Request metadata
stop_sequences (List[str], optional): Custom stop sequences
stream (bool, optional): Whether to stream the response
system (str, optional): System prompt
temperature (float, optional): Sampling temperature (0.0 to 1.0)
thinking (Dict, optional): Extended thinking configuration
tool_choice (Dict, optional): Tool choice configuration
tools (List[Dict], optional): List of tool definitions
top_k (int, optional): Top K sampling parameter
top_p (float, optional): Nucleus sampling parameter
**kwargs: Additional arguments
Returns:
Dict: Response from the API
"""
raise NotImplementedError("This function is not implemented")

View file

@ -0,0 +1,116 @@
## Use LLM API endpoints in Anthropic Interface
Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking.
## Usage
---
### LiteLLM Python SDK
#### Non-streaming example
```python showLineNumbers title="Example using LiteLLM Python SDK"
import litellm
response = await litellm.anthropic.messages.acreate(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
api_key=api_key,
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
)
```
Example response:
```json
{
"content": [
{
"text": "Hi! this is a very short joke",
"type": "text"
}
],
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
"model": "claude-3-7-sonnet-20250219",
"role": "assistant",
"stop_reason": "end_turn",
"stop_sequence": null,
"type": "message",
"usage": {
"input_tokens": 2095,
"output_tokens": 503,
"cache_creation_input_tokens": 2095,
"cache_read_input_tokens": 0
}
}
```
#### Streaming example
```python showLineNumbers title="Example using LiteLLM Python SDK"
import litellm
response = await litellm.anthropic.messages.acreate(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
api_key=api_key,
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
stream=True,
)
async for chunk in response:
print(chunk)
```
### LiteLLM Proxy Server
1. Setup config.yaml
```yaml
model_list:
- model_name: anthropic-claude
litellm_params:
model: claude-3-7-sonnet-latest
```
2. Start proxy
```bash
litellm --config /path/to/config.yaml
```
3. Test it!
<Tabs>
<TabItem label="Anthropic Python SDK" value="python">
```python showLineNumbers title="Example using LiteLLM Proxy Server"
import anthropic
# point anthropic sdk to litellm proxy
client = anthropic.Anthropic(
base_url="http://0.0.0.0:4000",
api_key="sk-1234",
)
response = client.messages.create(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
)
```
</TabItem>
<TabItem label="curl" value="curl">
```bash showLineNumbers title="Example using LiteLLM Proxy Server"
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
-H 'content-type: application/json' \
-H 'x-api-key: $LITELLM_API_KEY' \
-H 'anthropic-version: 2023-06-01' \
-d '{
"model": "anthropic-claude",
"messages": [
{
"role": "user",
"content": "Hello, can you tell me a short joke?"
}
],
"max_tokens": 100
}'
```

View file

@ -6,7 +6,7 @@
"""
import json
from typing import Any, AsyncIterator, Dict, Optional, Union, cast
from typing import AsyncIterator, Dict, List, Optional, Union, cast
import httpx
@ -19,6 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client,
)
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
)
from litellm.types.router import GenericLiteLLMParams
from litellm.types.utils import ProviderSpecificHeader
from litellm.utils import ProviderConfigManager, client
@ -60,14 +63,25 @@ class AnthropicMessagesHandler:
@client
async def anthropic_messages(
api_key: str,
max_tokens: int,
messages: List[Dict],
model: str,
stream: bool = False,
metadata: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = None,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
client: Optional[AsyncHTTPHandler] = None,
custom_llm_provider: Optional[str] = None,
**kwargs,
) -> Union[Dict[str, Any], AsyncIterator]:
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
"""
Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
"""
@ -129,10 +143,8 @@ async def anthropic_messages(
},
custom_llm_provider=_custom_llm_provider,
)
litellm_logging_obj.model_call_details.update(kwargs)
# Prepare request body
request_body = kwargs.copy()
request_body = locals().copy()
request_body = {
k: v
for k, v in request_body.items()
@ -140,10 +152,12 @@ async def anthropic_messages(
in anthropic_messages_provider_config.get_supported_anthropic_messages_params(
model=model
)
and v is not None
}
request_body["stream"] = stream
request_body["model"] = model
litellm_logging_obj.stream = stream
litellm_logging_obj.model_call_details.update(request_body)
# Make the request
request_url = anthropic_messages_provider_config.get_complete_url(
@ -164,7 +178,7 @@ async def anthropic_messages(
url=request_url,
headers=headers,
data=json.dumps(request_body),
stream=stream,
stream=stream or False,
)
response.raise_for_status()

View file

@ -0,0 +1,83 @@
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
from typing_extensions import TypeAlias
class AnthropicResponseTextBlock(TypedDict, total=False):
"""
Anthropic Response Text Block: https://docs.anthropic.com/en/api/messages
"""
citations: Optional[List[Dict[str, Any]]]
text: str
type: Literal["text"]
class AnthropicResponseToolUseBlock(TypedDict, total=False):
"""
Anthropic Response Tool Use Block: https://docs.anthropic.com/en/api/messages
"""
id: Optional[str]
input: Optional[str]
name: Optional[str]
type: Literal["tool_use"]
class AnthropicResponseThinkingBlock(TypedDict, total=False):
"""
Anthropic Response Thinking Block: https://docs.anthropic.com/en/api/messages
"""
signature: Optional[str]
thinking: Optional[str]
type: Literal["thinking"]
class AnthropicResponseRedactedThinkingBlock(TypedDict, total=False):
"""
Anthropic Response Redacted Thinking Block: https://docs.anthropic.com/en/api/messages
"""
data: Optional[str]
type: Literal["redacted_thinking"]
AnthropicResponseContentBlock: TypeAlias = Union[
AnthropicResponseTextBlock,
AnthropicResponseToolUseBlock,
AnthropicResponseThinkingBlock,
AnthropicResponseRedactedThinkingBlock,
]
class AnthropicUsage(TypedDict, total=False):
"""
Input and output tokens used in the request
"""
input_tokens: int
output_tokens: int
"""
Cache Tokens Used
"""
cache_creation_input_tokens: int
cache_read_input_tokens: int
class AnthropicMessagesResponse(TypedDict, total=False):
"""
Anthropic Messages API Response: https://docs.anthropic.com/en/api/messages
"""
content: Optional[List[AnthropicResponseContentBlock]]
id: str
model: Optional[str] # This represents the Model type from Anthropic
role: Optional[Literal["assistant"]]
stop_reason: Optional[
Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
]
stop_sequence: Optional[str]
type: Optional[Literal["message"]]
usage: Optional[AnthropicUsage]