mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
Merge pull request #9567 from BerriAI/litellm_anthropic_messages_improvements
[Refactor] - Expose litellm.messages.acreate() and litellm.messages.create() to make LLM API calls in Anthropic API spec
This commit is contained in:
commit
bc5cc51b9d
9 changed files with 593 additions and 44 deletions
|
@ -1038,6 +1038,7 @@ from .cost_calculator import response_cost_calculator, cost_per_token
|
|||
|
||||
### ADAPTERS ###
|
||||
from .types.adapter import AdapterItem
|
||||
import litellm.anthropic_interface as anthropic
|
||||
|
||||
adapters: List[AdapterItem] = []
|
||||
|
||||
|
|
6
litellm/anthropic_interface/__init__.py
Normal file
6
litellm/anthropic_interface/__init__.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
"""
|
||||
Anthropic module for LiteLLM
|
||||
"""
|
||||
from .messages import acreate, create
|
||||
|
||||
__all__ = ["acreate", "create"]
|
117
litellm/anthropic_interface/messages/__init__.py
Normal file
117
litellm/anthropic_interface/messages/__init__.py
Normal file
|
@ -0,0 +1,117 @@
|
|||
"""
|
||||
Interface for Anthropic's messages API
|
||||
|
||||
Use this to call LLMs in Anthropic /messages Request/Response format
|
||||
|
||||
This is an __init__.py file to allow the following interface
|
||||
|
||||
- litellm.messages.acreate
|
||||
- litellm.messages.create
|
||||
|
||||
"""
|
||||
|
||||
from typing import AsyncIterator, Dict, Iterator, List, Optional, Union
|
||||
|
||||
from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
|
||||
anthropic_messages as _async_anthropic_messages,
|
||||
)
|
||||
from litellm.types.llms.anthropic_messages.anthropic_response import (
|
||||
AnthropicMessagesResponse,
|
||||
)
|
||||
|
||||
|
||||
async def acreate(
|
||||
max_tokens: int,
|
||||
messages: List[Dict],
|
||||
model: str,
|
||||
metadata: Optional[Dict] = None,
|
||||
stop_sequences: Optional[List[str]] = None,
|
||||
stream: Optional[bool] = False,
|
||||
system: Optional[str] = None,
|
||||
temperature: Optional[float] = 1.0,
|
||||
thinking: Optional[Dict] = None,
|
||||
tool_choice: Optional[Dict] = None,
|
||||
tools: Optional[List[Dict]] = None,
|
||||
top_k: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
**kwargs
|
||||
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
|
||||
"""
|
||||
Async wrapper for Anthropic's messages API
|
||||
|
||||
Args:
|
||||
max_tokens (int): Maximum tokens to generate (required)
|
||||
messages (List[Dict]): List of message objects with role and content (required)
|
||||
model (str): Model name to use (required)
|
||||
metadata (Dict, optional): Request metadata
|
||||
stop_sequences (List[str], optional): Custom stop sequences
|
||||
stream (bool, optional): Whether to stream the response
|
||||
system (str, optional): System prompt
|
||||
temperature (float, optional): Sampling temperature (0.0 to 1.0)
|
||||
thinking (Dict, optional): Extended thinking configuration
|
||||
tool_choice (Dict, optional): Tool choice configuration
|
||||
tools (List[Dict], optional): List of tool definitions
|
||||
top_k (int, optional): Top K sampling parameter
|
||||
top_p (float, optional): Nucleus sampling parameter
|
||||
**kwargs: Additional arguments
|
||||
|
||||
Returns:
|
||||
Dict: Response from the API
|
||||
"""
|
||||
return await _async_anthropic_messages(
|
||||
max_tokens=max_tokens,
|
||||
messages=messages,
|
||||
model=model,
|
||||
metadata=metadata,
|
||||
stop_sequences=stop_sequences,
|
||||
stream=stream,
|
||||
system=system,
|
||||
temperature=temperature,
|
||||
thinking=thinking,
|
||||
tool_choice=tool_choice,
|
||||
tools=tools,
|
||||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
async def create(
|
||||
max_tokens: int,
|
||||
messages: List[Dict],
|
||||
model: str,
|
||||
metadata: Optional[Dict] = None,
|
||||
stop_sequences: Optional[List[str]] = None,
|
||||
stream: Optional[bool] = False,
|
||||
system: Optional[str] = None,
|
||||
temperature: Optional[float] = 1.0,
|
||||
thinking: Optional[Dict] = None,
|
||||
tool_choice: Optional[Dict] = None,
|
||||
tools: Optional[List[Dict]] = None,
|
||||
top_k: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
**kwargs
|
||||
) -> Union[AnthropicMessagesResponse, Iterator]:
|
||||
"""
|
||||
Async wrapper for Anthropic's messages API
|
||||
|
||||
Args:
|
||||
max_tokens (int): Maximum tokens to generate (required)
|
||||
messages (List[Dict]): List of message objects with role and content (required)
|
||||
model (str): Model name to use (required)
|
||||
metadata (Dict, optional): Request metadata
|
||||
stop_sequences (List[str], optional): Custom stop sequences
|
||||
stream (bool, optional): Whether to stream the response
|
||||
system (str, optional): System prompt
|
||||
temperature (float, optional): Sampling temperature (0.0 to 1.0)
|
||||
thinking (Dict, optional): Extended thinking configuration
|
||||
tool_choice (Dict, optional): Tool choice configuration
|
||||
tools (List[Dict], optional): List of tool definitions
|
||||
top_k (int, optional): Top K sampling parameter
|
||||
top_p (float, optional): Nucleus sampling parameter
|
||||
**kwargs: Additional arguments
|
||||
|
||||
Returns:
|
||||
Dict: Response from the API
|
||||
"""
|
||||
raise NotImplementedError("This function is not implemented")
|
116
litellm/anthropic_interface/readme.md
Normal file
116
litellm/anthropic_interface/readme.md
Normal file
|
@ -0,0 +1,116 @@
|
|||
## Use LLM API endpoints in Anthropic Interface
|
||||
|
||||
Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking.
|
||||
|
||||
|
||||
## Usage
|
||||
---
|
||||
|
||||
### LiteLLM Python SDK
|
||||
|
||||
#### Non-streaming example
|
||||
```python showLineNumbers title="Example using LiteLLM Python SDK"
|
||||
import litellm
|
||||
response = await litellm.anthropic.messages.acreate(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
api_key=api_key,
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
)
|
||||
```
|
||||
|
||||
Example response:
|
||||
```json
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "Hi! this is a very short joke",
|
||||
"type": "text"
|
||||
}
|
||||
],
|
||||
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
|
||||
"model": "claude-3-7-sonnet-20250219",
|
||||
"role": "assistant",
|
||||
"stop_reason": "end_turn",
|
||||
"stop_sequence": null,
|
||||
"type": "message",
|
||||
"usage": {
|
||||
"input_tokens": 2095,
|
||||
"output_tokens": 503,
|
||||
"cache_creation_input_tokens": 2095,
|
||||
"cache_read_input_tokens": 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Streaming example
|
||||
```python showLineNumbers title="Example using LiteLLM Python SDK"
|
||||
import litellm
|
||||
response = await litellm.anthropic.messages.acreate(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
api_key=api_key,
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
stream=True,
|
||||
)
|
||||
async for chunk in response:
|
||||
print(chunk)
|
||||
```
|
||||
|
||||
### LiteLLM Proxy Server
|
||||
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: anthropic-claude
|
||||
litellm_params:
|
||||
model: claude-3-7-sonnet-latest
|
||||
```
|
||||
|
||||
2. Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
<Tabs>
|
||||
<TabItem label="Anthropic Python SDK" value="python">
|
||||
|
||||
```python showLineNumbers title="Example using LiteLLM Proxy Server"
|
||||
import anthropic
|
||||
|
||||
# point anthropic sdk to litellm proxy
|
||||
client = anthropic.Anthropic(
|
||||
base_url="http://0.0.0.0:4000",
|
||||
api_key="sk-1234",
|
||||
)
|
||||
|
||||
response = client.messages.create(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem label="curl" value="curl">
|
||||
|
||||
```bash showLineNumbers title="Example using LiteLLM Proxy Server"
|
||||
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
|
||||
-H 'content-type: application/json' \
|
||||
-H 'x-api-key: $LITELLM_API_KEY' \
|
||||
-H 'anthropic-version: 2023-06-01' \
|
||||
-d '{
|
||||
"model": "anthropic-claude",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, can you tell me a short joke?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 100
|
||||
}'
|
||||
```
|
|
@ -6,7 +6,7 @@
|
|||
"""
|
||||
|
||||
import json
|
||||
from typing import Any, AsyncIterator, Dict, Optional, Union, cast
|
||||
from typing import AsyncIterator, Dict, List, Optional, Union, cast
|
||||
|
||||
import httpx
|
||||
|
||||
|
@ -19,6 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
AsyncHTTPHandler,
|
||||
get_async_httpx_client,
|
||||
)
|
||||
from litellm.types.llms.anthropic_messages.anthropic_response import (
|
||||
AnthropicMessagesResponse,
|
||||
)
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
from litellm.types.utils import ProviderSpecificHeader
|
||||
from litellm.utils import ProviderConfigManager, client
|
||||
|
@ -60,14 +63,25 @@ class AnthropicMessagesHandler:
|
|||
|
||||
@client
|
||||
async def anthropic_messages(
|
||||
api_key: str,
|
||||
max_tokens: int,
|
||||
messages: List[Dict],
|
||||
model: str,
|
||||
stream: bool = False,
|
||||
metadata: Optional[Dict] = None,
|
||||
stop_sequences: Optional[List[str]] = None,
|
||||
stream: Optional[bool] = False,
|
||||
system: Optional[str] = None,
|
||||
temperature: Optional[float] = None,
|
||||
thinking: Optional[Dict] = None,
|
||||
tool_choice: Optional[Dict] = None,
|
||||
tools: Optional[List[Dict]] = None,
|
||||
top_k: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
client: Optional[AsyncHTTPHandler] = None,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> Union[Dict[str, Any], AsyncIterator]:
|
||||
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
|
||||
"""
|
||||
Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
|
||||
"""
|
||||
|
@ -129,10 +143,8 @@ async def anthropic_messages(
|
|||
},
|
||||
custom_llm_provider=_custom_llm_provider,
|
||||
)
|
||||
litellm_logging_obj.model_call_details.update(kwargs)
|
||||
|
||||
# Prepare request body
|
||||
request_body = kwargs.copy()
|
||||
request_body = locals().copy()
|
||||
request_body = {
|
||||
k: v
|
||||
for k, v in request_body.items()
|
||||
|
@ -140,10 +152,12 @@ async def anthropic_messages(
|
|||
in anthropic_messages_provider_config.get_supported_anthropic_messages_params(
|
||||
model=model
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
request_body["stream"] = stream
|
||||
request_body["model"] = model
|
||||
litellm_logging_obj.stream = stream
|
||||
litellm_logging_obj.model_call_details.update(request_body)
|
||||
|
||||
# Make the request
|
||||
request_url = anthropic_messages_provider_config.get_complete_url(
|
||||
|
@ -164,7 +178,7 @@ async def anthropic_messages(
|
|||
url=request_url,
|
||||
headers=headers,
|
||||
data=json.dumps(request_body),
|
||||
stream=stream,
|
||||
stream=stream or False,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
|
83
litellm/types/llms/anthropic_messages/anthropic_response.py
Normal file
83
litellm/types/llms/anthropic_messages/anthropic_response.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
|
||||
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
|
||||
class AnthropicResponseTextBlock(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Response Text Block: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
citations: Optional[List[Dict[str, Any]]]
|
||||
text: str
|
||||
type: Literal["text"]
|
||||
|
||||
|
||||
class AnthropicResponseToolUseBlock(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Response Tool Use Block: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
id: Optional[str]
|
||||
input: Optional[str]
|
||||
name: Optional[str]
|
||||
type: Literal["tool_use"]
|
||||
|
||||
|
||||
class AnthropicResponseThinkingBlock(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Response Thinking Block: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
signature: Optional[str]
|
||||
thinking: Optional[str]
|
||||
type: Literal["thinking"]
|
||||
|
||||
|
||||
class AnthropicResponseRedactedThinkingBlock(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Response Redacted Thinking Block: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
data: Optional[str]
|
||||
type: Literal["redacted_thinking"]
|
||||
|
||||
|
||||
AnthropicResponseContentBlock: TypeAlias = Union[
|
||||
AnthropicResponseTextBlock,
|
||||
AnthropicResponseToolUseBlock,
|
||||
AnthropicResponseThinkingBlock,
|
||||
AnthropicResponseRedactedThinkingBlock,
|
||||
]
|
||||
|
||||
|
||||
class AnthropicUsage(TypedDict, total=False):
|
||||
"""
|
||||
Input and output tokens used in the request
|
||||
"""
|
||||
|
||||
input_tokens: int
|
||||
output_tokens: int
|
||||
|
||||
"""
|
||||
Cache Tokens Used
|
||||
"""
|
||||
cache_creation_input_tokens: int
|
||||
cache_read_input_tokens: int
|
||||
|
||||
|
||||
class AnthropicMessagesResponse(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Messages API Response: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
content: Optional[List[AnthropicResponseContentBlock]]
|
||||
id: str
|
||||
model: Optional[str] # This represents the Model type from Anthropic
|
||||
role: Optional[Literal["assistant"]]
|
||||
stop_reason: Optional[
|
||||
Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
|
||||
]
|
||||
stop_sequence: Optional[str]
|
||||
type: Optional[Literal["message"]]
|
||||
usage: Optional[AnthropicUsage]
|
Loading…
Add table
Add a link
Reference in a new issue