mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
Merge pull request #9567 from BerriAI/litellm_anthropic_messages_improvements
[Refactor] - Expose litellm.messages.acreate() and litellm.messages.create() to make LLM API calls in Anthropic API spec
This commit is contained in:
commit
bc5cc51b9d
9 changed files with 593 additions and 44 deletions
|
@ -3,9 +3,10 @@ import TabItem from '@theme/TabItem';
|
|||
|
||||
# /v1/messages [BETA]
|
||||
|
||||
LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint.
|
||||
Use LiteLLM to call all your LLM APIs in the Anthropic `v1/messages` format.
|
||||
|
||||
This currently just supports the Anthropic API.
|
||||
|
||||
## Overview
|
||||
|
||||
| Feature | Supported | Notes |
|
||||
|-------|-------|-------|
|
||||
|
@ -21,9 +22,61 @@ Planned improvement:
|
|||
- Bedrock Anthropic support
|
||||
|
||||
## Usage
|
||||
---
|
||||
|
||||
### LiteLLM Python SDK
|
||||
|
||||
#### Non-streaming example
|
||||
```python showLineNumbers title="Example using LiteLLM Python SDK"
|
||||
import litellm
|
||||
response = await litellm.anthropic.messages.acreate(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
api_key=api_key,
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
)
|
||||
```
|
||||
|
||||
Example response:
|
||||
```json
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "Hi! this is a very short joke",
|
||||
"type": "text"
|
||||
}
|
||||
],
|
||||
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
|
||||
"model": "claude-3-7-sonnet-20250219",
|
||||
"role": "assistant",
|
||||
"stop_reason": "end_turn",
|
||||
"stop_sequence": null,
|
||||
"type": "message",
|
||||
"usage": {
|
||||
"input_tokens": 2095,
|
||||
"output_tokens": 503,
|
||||
"cache_creation_input_tokens": 2095,
|
||||
"cache_read_input_tokens": 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Streaming example
|
||||
```python showLineNumbers title="Example using LiteLLM Python SDK"
|
||||
import litellm
|
||||
response = await litellm.anthropic.messages.acreate(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
api_key=api_key,
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
stream=True,
|
||||
)
|
||||
async for chunk in response:
|
||||
print(chunk)
|
||||
```
|
||||
|
||||
### LiteLLM Proxy Server
|
||||
|
||||
<Tabs>
|
||||
<TabItem label="PROXY" value="proxy">
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
|
@ -42,7 +95,28 @@ litellm --config /path/to/config.yaml
|
|||
|
||||
3. Test it!
|
||||
|
||||
```bash
|
||||
<Tabs>
|
||||
<TabItem label="Anthropic Python SDK" value="python">
|
||||
|
||||
```python showLineNumbers title="Example using LiteLLM Proxy Server"
|
||||
import anthropic
|
||||
|
||||
# point anthropic sdk to litellm proxy
|
||||
client = anthropic.Anthropic(
|
||||
base_url="http://0.0.0.0:4000",
|
||||
api_key="sk-1234",
|
||||
)
|
||||
|
||||
response = client.messages.create(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem label="curl" value="curl">
|
||||
|
||||
```bash showLineNumbers title="Example using LiteLLM Proxy Server"
|
||||
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
|
||||
-H 'content-type: application/json' \
|
||||
-H 'x-api-key: $LITELLM_API_KEY' \
|
||||
|
@ -52,41 +126,176 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
|
|||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "List 5 important events in the XIX century"
|
||||
}
|
||||
]
|
||||
"content": "Hello, can you tell me a short joke?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 4096
|
||||
"max_tokens": 100
|
||||
}'
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
</Tabs>
|
||||
|
||||
```python
|
||||
from litellm.llms.anthropic.experimental_pass_through.messages.handler import anthropic_messages
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
# set env
|
||||
os.environ["ANTHROPIC_API_KEY"] = "my-api-key"
|
||||
## Request Format
|
||||
---
|
||||
|
||||
messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
|
||||
Request body will be in the Anthropic messages API format. **litellm follows the Anthropic messages specification for this endpoint.**
|
||||
|
||||
# Call the handler
|
||||
async def call():
|
||||
response = await anthropic_messages(
|
||||
messages=messages,
|
||||
api_key=api_key,
|
||||
model="claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
)
|
||||
#### Example request body
|
||||
|
||||
asyncio.run(call())
|
||||
```json
|
||||
{
|
||||
"model": "claude-3-7-sonnet-20250219",
|
||||
"max_tokens": 1024,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, world"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
#### Required Fields
|
||||
- **model** (string):
|
||||
The model identifier (e.g., `"claude-3-7-sonnet-20250219"`).
|
||||
- **max_tokens** (integer):
|
||||
The maximum number of tokens to generate before stopping.
|
||||
_Note: The model may stop before reaching this limit; value must be greater than 1._
|
||||
- **messages** (array of objects):
|
||||
An ordered list of conversational turns.
|
||||
Each message object must include:
|
||||
- **role** (enum: `"user"` or `"assistant"`):
|
||||
Specifies the speaker of the message.
|
||||
- **content** (string or array of content blocks):
|
||||
The text or content blocks (e.g., an array containing objects with a `type` such as `"text"`) that form the message.
|
||||
_Example equivalence:_
|
||||
```json
|
||||
{"role": "user", "content": "Hello, Claude"}
|
||||
```
|
||||
is equivalent to:
|
||||
```json
|
||||
{"role": "user", "content": [{"type": "text", "text": "Hello, Claude"}]}
|
||||
```
|
||||
|
||||
#### Optional Fields
|
||||
- **metadata** (object):
|
||||
Contains additional metadata about the request (e.g., `user_id` as an opaque identifier).
|
||||
- **stop_sequences** (array of strings):
|
||||
Custom sequences that, when encountered in the generated text, cause the model to stop.
|
||||
- **stream** (boolean):
|
||||
Indicates whether to stream the response using server-sent events.
|
||||
- **system** (string or array):
|
||||
A system prompt providing context or specific instructions to the model.
|
||||
- **temperature** (number):
|
||||
Controls randomness in the model’s responses. Valid range: `0 < temperature < 1`.
|
||||
- **thinking** (object):
|
||||
Configuration for enabling extended thinking. If enabled, it includes:
|
||||
- **budget_tokens** (integer):
|
||||
Minimum of 1024 tokens (and less than `max_tokens`).
|
||||
- **type** (enum):
|
||||
E.g., `"enabled"`.
|
||||
- **tool_choice** (object):
|
||||
Instructs how the model should utilize any provided tools.
|
||||
- **tools** (array of objects):
|
||||
Definitions for tools available to the model. Each tool includes:
|
||||
- **name** (string):
|
||||
The tool’s name.
|
||||
- **description** (string):
|
||||
A detailed description of the tool.
|
||||
- **input_schema** (object):
|
||||
A JSON schema describing the expected input format for the tool.
|
||||
- **top_k** (integer):
|
||||
Limits sampling to the top K options.
|
||||
- **top_p** (number):
|
||||
Enables nucleus sampling with a cumulative probability cutoff. Valid range: `0 < top_p < 1`.
|
||||
|
||||
|
||||
## Response Format
|
||||
---
|
||||
|
||||
Responses will be in the Anthropic messages API format.
|
||||
|
||||
#### Example Response
|
||||
|
||||
```json
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "Hi! My name is Claude.",
|
||||
"type": "text"
|
||||
}
|
||||
],
|
||||
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
|
||||
"model": "claude-3-7-sonnet-20250219",
|
||||
"role": "assistant",
|
||||
"stop_reason": "end_turn",
|
||||
"stop_sequence": null,
|
||||
"type": "message",
|
||||
"usage": {
|
||||
"input_tokens": 2095,
|
||||
"output_tokens": 503,
|
||||
"cache_creation_input_tokens": 2095,
|
||||
"cache_read_input_tokens": 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Response fields
|
||||
|
||||
- **content** (array of objects):
|
||||
Contains the generated content blocks from the model. Each block includes:
|
||||
- **type** (string):
|
||||
Indicates the type of content (e.g., `"text"`, `"tool_use"`, `"thinking"`, or `"redacted_thinking"`).
|
||||
- **text** (string):
|
||||
The generated text from the model.
|
||||
_Note: Maximum length is 5,000,000 characters._
|
||||
- **citations** (array of objects or `null`):
|
||||
Optional field providing citation details. Each citation includes:
|
||||
- **cited_text** (string):
|
||||
The excerpt being cited.
|
||||
- **document_index** (integer):
|
||||
An index referencing the cited document.
|
||||
- **document_title** (string or `null`):
|
||||
The title of the cited document.
|
||||
- **start_char_index** (integer):
|
||||
The starting character index for the citation.
|
||||
- **end_char_index** (integer):
|
||||
The ending character index for the citation.
|
||||
- **type** (string):
|
||||
Typically `"char_location"`.
|
||||
|
||||
- **id** (string):
|
||||
A unique identifier for the response message.
|
||||
_Note: The format and length of IDs may change over time._
|
||||
|
||||
- **model** (string):
|
||||
Specifies the model that generated the response.
|
||||
|
||||
- **role** (string):
|
||||
Indicates the role of the generated message. For responses, this is always `"assistant"`.
|
||||
|
||||
- **stop_reason** (string):
|
||||
Explains why the model stopped generating text. Possible values include:
|
||||
- `"end_turn"`: The model reached a natural stopping point.
|
||||
- `"max_tokens"`: The generation stopped because the maximum token limit was reached.
|
||||
- `"stop_sequence"`: A custom stop sequence was encountered.
|
||||
- `"tool_use"`: The model invoked one or more tools.
|
||||
|
||||
- **stop_sequence** (string or `null`):
|
||||
Contains the specific stop sequence that caused the generation to halt, if applicable; otherwise, it is `null`.
|
||||
|
||||
- **type** (string):
|
||||
Denotes the type of response object, which is always `"message"`.
|
||||
|
||||
- **usage** (object):
|
||||
Provides details on token usage for billing and rate limiting. This includes:
|
||||
- **input_tokens** (integer):
|
||||
Total number of input tokens processed.
|
||||
- **output_tokens** (integer):
|
||||
Total number of output tokens generated.
|
||||
- **cache_creation_input_tokens** (integer or `null`):
|
||||
Number of tokens used to create a cache entry.
|
||||
- **cache_read_input_tokens** (integer or `null`):
|
||||
Number of tokens read from the cache.
|
||||
|
|
|
@ -1038,6 +1038,7 @@ from .cost_calculator import response_cost_calculator, cost_per_token
|
|||
|
||||
### ADAPTERS ###
|
||||
from .types.adapter import AdapterItem
|
||||
import litellm.anthropic_interface as anthropic
|
||||
|
||||
adapters: List[AdapterItem] = []
|
||||
|
||||
|
|
6
litellm/anthropic_interface/__init__.py
Normal file
6
litellm/anthropic_interface/__init__.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
"""
|
||||
Anthropic module for LiteLLM
|
||||
"""
|
||||
from .messages import acreate, create
|
||||
|
||||
__all__ = ["acreate", "create"]
|
117
litellm/anthropic_interface/messages/__init__.py
Normal file
117
litellm/anthropic_interface/messages/__init__.py
Normal file
|
@ -0,0 +1,117 @@
|
|||
"""
|
||||
Interface for Anthropic's messages API
|
||||
|
||||
Use this to call LLMs in Anthropic /messages Request/Response format
|
||||
|
||||
This is an __init__.py file to allow the following interface
|
||||
|
||||
- litellm.messages.acreate
|
||||
- litellm.messages.create
|
||||
|
||||
"""
|
||||
|
||||
from typing import AsyncIterator, Dict, Iterator, List, Optional, Union
|
||||
|
||||
from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
|
||||
anthropic_messages as _async_anthropic_messages,
|
||||
)
|
||||
from litellm.types.llms.anthropic_messages.anthropic_response import (
|
||||
AnthropicMessagesResponse,
|
||||
)
|
||||
|
||||
|
||||
async def acreate(
|
||||
max_tokens: int,
|
||||
messages: List[Dict],
|
||||
model: str,
|
||||
metadata: Optional[Dict] = None,
|
||||
stop_sequences: Optional[List[str]] = None,
|
||||
stream: Optional[bool] = False,
|
||||
system: Optional[str] = None,
|
||||
temperature: Optional[float] = 1.0,
|
||||
thinking: Optional[Dict] = None,
|
||||
tool_choice: Optional[Dict] = None,
|
||||
tools: Optional[List[Dict]] = None,
|
||||
top_k: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
**kwargs
|
||||
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
|
||||
"""
|
||||
Async wrapper for Anthropic's messages API
|
||||
|
||||
Args:
|
||||
max_tokens (int): Maximum tokens to generate (required)
|
||||
messages (List[Dict]): List of message objects with role and content (required)
|
||||
model (str): Model name to use (required)
|
||||
metadata (Dict, optional): Request metadata
|
||||
stop_sequences (List[str], optional): Custom stop sequences
|
||||
stream (bool, optional): Whether to stream the response
|
||||
system (str, optional): System prompt
|
||||
temperature (float, optional): Sampling temperature (0.0 to 1.0)
|
||||
thinking (Dict, optional): Extended thinking configuration
|
||||
tool_choice (Dict, optional): Tool choice configuration
|
||||
tools (List[Dict], optional): List of tool definitions
|
||||
top_k (int, optional): Top K sampling parameter
|
||||
top_p (float, optional): Nucleus sampling parameter
|
||||
**kwargs: Additional arguments
|
||||
|
||||
Returns:
|
||||
Dict: Response from the API
|
||||
"""
|
||||
return await _async_anthropic_messages(
|
||||
max_tokens=max_tokens,
|
||||
messages=messages,
|
||||
model=model,
|
||||
metadata=metadata,
|
||||
stop_sequences=stop_sequences,
|
||||
stream=stream,
|
||||
system=system,
|
||||
temperature=temperature,
|
||||
thinking=thinking,
|
||||
tool_choice=tool_choice,
|
||||
tools=tools,
|
||||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
async def create(
|
||||
max_tokens: int,
|
||||
messages: List[Dict],
|
||||
model: str,
|
||||
metadata: Optional[Dict] = None,
|
||||
stop_sequences: Optional[List[str]] = None,
|
||||
stream: Optional[bool] = False,
|
||||
system: Optional[str] = None,
|
||||
temperature: Optional[float] = 1.0,
|
||||
thinking: Optional[Dict] = None,
|
||||
tool_choice: Optional[Dict] = None,
|
||||
tools: Optional[List[Dict]] = None,
|
||||
top_k: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
**kwargs
|
||||
) -> Union[AnthropicMessagesResponse, Iterator]:
|
||||
"""
|
||||
Async wrapper for Anthropic's messages API
|
||||
|
||||
Args:
|
||||
max_tokens (int): Maximum tokens to generate (required)
|
||||
messages (List[Dict]): List of message objects with role and content (required)
|
||||
model (str): Model name to use (required)
|
||||
metadata (Dict, optional): Request metadata
|
||||
stop_sequences (List[str], optional): Custom stop sequences
|
||||
stream (bool, optional): Whether to stream the response
|
||||
system (str, optional): System prompt
|
||||
temperature (float, optional): Sampling temperature (0.0 to 1.0)
|
||||
thinking (Dict, optional): Extended thinking configuration
|
||||
tool_choice (Dict, optional): Tool choice configuration
|
||||
tools (List[Dict], optional): List of tool definitions
|
||||
top_k (int, optional): Top K sampling parameter
|
||||
top_p (float, optional): Nucleus sampling parameter
|
||||
**kwargs: Additional arguments
|
||||
|
||||
Returns:
|
||||
Dict: Response from the API
|
||||
"""
|
||||
raise NotImplementedError("This function is not implemented")
|
116
litellm/anthropic_interface/readme.md
Normal file
116
litellm/anthropic_interface/readme.md
Normal file
|
@ -0,0 +1,116 @@
|
|||
## Use LLM API endpoints in Anthropic Interface
|
||||
|
||||
Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking.
|
||||
|
||||
|
||||
## Usage
|
||||
---
|
||||
|
||||
### LiteLLM Python SDK
|
||||
|
||||
#### Non-streaming example
|
||||
```python showLineNumbers title="Example using LiteLLM Python SDK"
|
||||
import litellm
|
||||
response = await litellm.anthropic.messages.acreate(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
api_key=api_key,
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
)
|
||||
```
|
||||
|
||||
Example response:
|
||||
```json
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "Hi! this is a very short joke",
|
||||
"type": "text"
|
||||
}
|
||||
],
|
||||
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
|
||||
"model": "claude-3-7-sonnet-20250219",
|
||||
"role": "assistant",
|
||||
"stop_reason": "end_turn",
|
||||
"stop_sequence": null,
|
||||
"type": "message",
|
||||
"usage": {
|
||||
"input_tokens": 2095,
|
||||
"output_tokens": 503,
|
||||
"cache_creation_input_tokens": 2095,
|
||||
"cache_read_input_tokens": 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Streaming example
|
||||
```python showLineNumbers title="Example using LiteLLM Python SDK"
|
||||
import litellm
|
||||
response = await litellm.anthropic.messages.acreate(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
api_key=api_key,
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
stream=True,
|
||||
)
|
||||
async for chunk in response:
|
||||
print(chunk)
|
||||
```
|
||||
|
||||
### LiteLLM Proxy Server
|
||||
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: anthropic-claude
|
||||
litellm_params:
|
||||
model: claude-3-7-sonnet-latest
|
||||
```
|
||||
|
||||
2. Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
<Tabs>
|
||||
<TabItem label="Anthropic Python SDK" value="python">
|
||||
|
||||
```python showLineNumbers title="Example using LiteLLM Proxy Server"
|
||||
import anthropic
|
||||
|
||||
# point anthropic sdk to litellm proxy
|
||||
client = anthropic.Anthropic(
|
||||
base_url="http://0.0.0.0:4000",
|
||||
api_key="sk-1234",
|
||||
)
|
||||
|
||||
response = client.messages.create(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem label="curl" value="curl">
|
||||
|
||||
```bash showLineNumbers title="Example using LiteLLM Proxy Server"
|
||||
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
|
||||
-H 'content-type: application/json' \
|
||||
-H 'x-api-key: $LITELLM_API_KEY' \
|
||||
-H 'anthropic-version: 2023-06-01' \
|
||||
-d '{
|
||||
"model": "anthropic-claude",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, can you tell me a short joke?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 100
|
||||
}'
|
||||
```
|
|
@ -6,7 +6,7 @@
|
|||
"""
|
||||
|
||||
import json
|
||||
from typing import Any, AsyncIterator, Dict, Optional, Union, cast
|
||||
from typing import AsyncIterator, Dict, List, Optional, Union, cast
|
||||
|
||||
import httpx
|
||||
|
||||
|
@ -19,6 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
AsyncHTTPHandler,
|
||||
get_async_httpx_client,
|
||||
)
|
||||
from litellm.types.llms.anthropic_messages.anthropic_response import (
|
||||
AnthropicMessagesResponse,
|
||||
)
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
from litellm.types.utils import ProviderSpecificHeader
|
||||
from litellm.utils import ProviderConfigManager, client
|
||||
|
@ -60,14 +63,25 @@ class AnthropicMessagesHandler:
|
|||
|
||||
@client
|
||||
async def anthropic_messages(
|
||||
api_key: str,
|
||||
max_tokens: int,
|
||||
messages: List[Dict],
|
||||
model: str,
|
||||
stream: bool = False,
|
||||
metadata: Optional[Dict] = None,
|
||||
stop_sequences: Optional[List[str]] = None,
|
||||
stream: Optional[bool] = False,
|
||||
system: Optional[str] = None,
|
||||
temperature: Optional[float] = None,
|
||||
thinking: Optional[Dict] = None,
|
||||
tool_choice: Optional[Dict] = None,
|
||||
tools: Optional[List[Dict]] = None,
|
||||
top_k: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
client: Optional[AsyncHTTPHandler] = None,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> Union[Dict[str, Any], AsyncIterator]:
|
||||
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
|
||||
"""
|
||||
Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
|
||||
"""
|
||||
|
@ -129,10 +143,8 @@ async def anthropic_messages(
|
|||
},
|
||||
custom_llm_provider=_custom_llm_provider,
|
||||
)
|
||||
litellm_logging_obj.model_call_details.update(kwargs)
|
||||
|
||||
# Prepare request body
|
||||
request_body = kwargs.copy()
|
||||
request_body = locals().copy()
|
||||
request_body = {
|
||||
k: v
|
||||
for k, v in request_body.items()
|
||||
|
@ -140,10 +152,12 @@ async def anthropic_messages(
|
|||
in anthropic_messages_provider_config.get_supported_anthropic_messages_params(
|
||||
model=model
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
request_body["stream"] = stream
|
||||
request_body["model"] = model
|
||||
litellm_logging_obj.stream = stream
|
||||
litellm_logging_obj.model_call_details.update(request_body)
|
||||
|
||||
# Make the request
|
||||
request_url = anthropic_messages_provider_config.get_complete_url(
|
||||
|
@ -164,7 +178,7 @@ async def anthropic_messages(
|
|||
url=request_url,
|
||||
headers=headers,
|
||||
data=json.dumps(request_body),
|
||||
stream=stream,
|
||||
stream=stream or False,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
|
83
litellm/types/llms/anthropic_messages/anthropic_response.py
Normal file
83
litellm/types/llms/anthropic_messages/anthropic_response.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
|
||||
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
|
||||
class AnthropicResponseTextBlock(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Response Text Block: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
citations: Optional[List[Dict[str, Any]]]
|
||||
text: str
|
||||
type: Literal["text"]
|
||||
|
||||
|
||||
class AnthropicResponseToolUseBlock(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Response Tool Use Block: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
id: Optional[str]
|
||||
input: Optional[str]
|
||||
name: Optional[str]
|
||||
type: Literal["tool_use"]
|
||||
|
||||
|
||||
class AnthropicResponseThinkingBlock(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Response Thinking Block: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
signature: Optional[str]
|
||||
thinking: Optional[str]
|
||||
type: Literal["thinking"]
|
||||
|
||||
|
||||
class AnthropicResponseRedactedThinkingBlock(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Response Redacted Thinking Block: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
data: Optional[str]
|
||||
type: Literal["redacted_thinking"]
|
||||
|
||||
|
||||
AnthropicResponseContentBlock: TypeAlias = Union[
|
||||
AnthropicResponseTextBlock,
|
||||
AnthropicResponseToolUseBlock,
|
||||
AnthropicResponseThinkingBlock,
|
||||
AnthropicResponseRedactedThinkingBlock,
|
||||
]
|
||||
|
||||
|
||||
class AnthropicUsage(TypedDict, total=False):
|
||||
"""
|
||||
Input and output tokens used in the request
|
||||
"""
|
||||
|
||||
input_tokens: int
|
||||
output_tokens: int
|
||||
|
||||
"""
|
||||
Cache Tokens Used
|
||||
"""
|
||||
cache_creation_input_tokens: int
|
||||
cache_read_input_tokens: int
|
||||
|
||||
|
||||
class AnthropicMessagesResponse(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Messages API Response: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
content: Optional[List[AnthropicResponseContentBlock]]
|
||||
id: str
|
||||
model: Optional[str] # This represents the Model type from Anthropic
|
||||
role: Optional[Literal["assistant"]]
|
||||
stop_reason: Optional[
|
||||
Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
|
||||
]
|
||||
stop_sequence: Optional[str]
|
||||
type: Optional[Literal["message"]]
|
||||
usage: Optional[AnthropicUsage]
|
1
mypy.ini
1
mypy.ini
|
@ -2,6 +2,7 @@
|
|||
warn_return_any = False
|
||||
ignore_missing_imports = True
|
||||
mypy_path = litellm/stubs
|
||||
namespace_packages = True
|
||||
|
||||
[mypy-google.*]
|
||||
ignore_missing_imports = True
|
||||
|
|
|
@ -8,7 +8,7 @@ import unittest.mock
|
|||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
0, os.path.abspath("../../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
import pytest
|
||||
|
@ -16,6 +16,7 @@ from dotenv import load_dotenv
|
|||
from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
|
||||
anthropic_messages,
|
||||
)
|
||||
|
||||
from typing import Optional
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
@ -73,6 +74,7 @@ async def test_anthropic_messages_non_streaming():
|
|||
"""
|
||||
Test the anthropic_messages with non-streaming request
|
||||
"""
|
||||
litellm._turn_on_debug()
|
||||
# Get API key from environment
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY")
|
||||
if not api_key:
|
||||
|
@ -82,7 +84,7 @@ async def test_anthropic_messages_non_streaming():
|
|||
messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
|
||||
|
||||
# Call the handler
|
||||
response = await anthropic_messages(
|
||||
response = await litellm.anthropic.messages.acreate(
|
||||
messages=messages,
|
||||
api_key=api_key,
|
||||
model="claude-3-haiku-20240307",
|
||||
|
@ -114,7 +116,7 @@ async def test_anthropic_messages_streaming():
|
|||
|
||||
# Call the handler
|
||||
async_httpx_client = AsyncHTTPHandler()
|
||||
response = await anthropic_messages(
|
||||
response = await litellm.anthropic.messages.acreate(
|
||||
messages=messages,
|
||||
api_key=api_key,
|
||||
model="claude-3-haiku-20240307",
|
||||
|
@ -134,7 +136,7 @@ async def test_anthropic_messages_streaming_with_bad_request():
|
|||
Test the anthropic_messages with streaming request
|
||||
"""
|
||||
try:
|
||||
response = await anthropic_messages(
|
||||
response = await litellm.anthropic.messages.acreate(
|
||||
messages=["hi"],
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
model="claude-3-haiku-20240307",
|
||||
|
@ -458,7 +460,7 @@ async def test_anthropic_messages_with_extra_headers():
|
|||
mock_client.post = AsyncMock(return_value=mock_response)
|
||||
|
||||
# Call the handler with extra_headers and our mocked client
|
||||
response = await anthropic_messages(
|
||||
response = await litellm.anthropic.messages.acreate(
|
||||
messages=messages,
|
||||
api_key=api_key,
|
||||
model="claude-3-haiku-20240307",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue