diff --git a/docs/my-website/docs/anthropic_unified.md b/docs/my-website/docs/anthropic_unified.md
index cf6ba798d5..485571aa28 100644
--- a/docs/my-website/docs/anthropic_unified.md
+++ b/docs/my-website/docs/anthropic_unified.md
@@ -3,9 +3,10 @@ import TabItem from '@theme/TabItem';
# /v1/messages [BETA]
-LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint.
+Use LiteLLM to call all your LLM APIs in the Anthropic `v1/messages` format.
-This currently just supports the Anthropic API.
+
+## Overview
| Feature | Supported | Notes |
|-------|-------|-------|
@@ -21,9 +22,61 @@ Planned improvement:
- Bedrock Anthropic support
## Usage
+---
+
+### LiteLLM Python SDK
+
+#### Non-streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+ messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+ api_key=api_key,
+ model="anthropic/claude-3-haiku-20240307",
+ max_tokens=100,
+)
+```
+
+Example response:
+```json
+{
+ "content": [
+ {
+ "text": "Hi! this is a very short joke",
+ "type": "text"
+ }
+ ],
+ "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+ "model": "claude-3-7-sonnet-20250219",
+ "role": "assistant",
+ "stop_reason": "end_turn",
+ "stop_sequence": null,
+ "type": "message",
+ "usage": {
+ "input_tokens": 2095,
+ "output_tokens": 503,
+ "cache_creation_input_tokens": 2095,
+ "cache_read_input_tokens": 0
+ }
+}
+```
+
+#### Streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+ messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+ api_key=api_key,
+ model="anthropic/claude-3-haiku-20240307",
+ max_tokens=100,
+ stream=True,
+)
+async for chunk in response:
+ print(chunk)
+```
+
+### LiteLLM Proxy Server
-
-
1. Setup config.yaml
@@ -42,7 +95,28 @@ litellm --config /path/to/config.yaml
3. Test it!
-```bash
+
+
+
+```python showLineNumbers title="Example using LiteLLM Proxy Server"
+import anthropic
+
+# point anthropic sdk to litellm proxy
+client = anthropic.Anthropic(
+ base_url="http://0.0.0.0:4000",
+ api_key="sk-1234",
+)
+
+response = client.messages.create(
+ messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+ model="anthropic/claude-3-haiku-20240307",
+ max_tokens=100,
+)
+```
+
+
+
+```bash showLineNumbers title="Example using LiteLLM Proxy Server"
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
-H 'content-type: application/json' \
-H 'x-api-key: $LITELLM_API_KEY' \
@@ -52,41 +126,176 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
"messages": [
{
"role": "user",
- "content": [
- {
- "type": "text",
- "text": "List 5 important events in the XIX century"
- }
- ]
+ "content": "Hello, can you tell me a short joke?"
}
],
- "max_tokens": 4096
+ "max_tokens": 100
}'
```
+
-
+
-```python
-from litellm.llms.anthropic.experimental_pass_through.messages.handler import anthropic_messages
-import asyncio
-import os
-# set env
-os.environ["ANTHROPIC_API_KEY"] = "my-api-key"
+## Request Format
+---
-messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
+Request body will be in the Anthropic messages API format. **litellm follows the Anthropic messages specification for this endpoint.**
-# Call the handler
-async def call():
- response = await anthropic_messages(
- messages=messages,
- api_key=api_key,
- model="claude-3-haiku-20240307",
- max_tokens=100,
- )
+#### Example request body
-asyncio.run(call())
+```json
+{
+ "model": "claude-3-7-sonnet-20250219",
+ "max_tokens": 1024,
+ "messages": [
+ {
+ "role": "user",
+ "content": "Hello, world"
+ }
+ ]
+}
```
-
-
\ No newline at end of file
+#### Required Fields
+- **model** (string):
+ The model identifier (e.g., `"claude-3-7-sonnet-20250219"`).
+- **max_tokens** (integer):
+ The maximum number of tokens to generate before stopping.
+ _Note: The model may stop before reaching this limit; value must be greater than 1._
+- **messages** (array of objects):
+ An ordered list of conversational turns.
+ Each message object must include:
+ - **role** (enum: `"user"` or `"assistant"`):
+ Specifies the speaker of the message.
+ - **content** (string or array of content blocks):
+ The text or content blocks (e.g., an array containing objects with a `type` such as `"text"`) that form the message.
+ _Example equivalence:_
+ ```json
+ {"role": "user", "content": "Hello, Claude"}
+ ```
+ is equivalent to:
+ ```json
+ {"role": "user", "content": [{"type": "text", "text": "Hello, Claude"}]}
+ ```
+
+#### Optional Fields
+- **metadata** (object):
+ Contains additional metadata about the request (e.g., `user_id` as an opaque identifier).
+- **stop_sequences** (array of strings):
+ Custom sequences that, when encountered in the generated text, cause the model to stop.
+- **stream** (boolean):
+ Indicates whether to stream the response using server-sent events.
+- **system** (string or array):
+ A system prompt providing context or specific instructions to the model.
+- **temperature** (number):
+ Controls randomness in the model’s responses. Valid range: `0 < temperature < 1`.
+- **thinking** (object):
+ Configuration for enabling extended thinking. If enabled, it includes:
+ - **budget_tokens** (integer):
+ Minimum of 1024 tokens (and less than `max_tokens`).
+ - **type** (enum):
+ E.g., `"enabled"`.
+- **tool_choice** (object):
+ Instructs how the model should utilize any provided tools.
+- **tools** (array of objects):
+ Definitions for tools available to the model. Each tool includes:
+ - **name** (string):
+ The tool’s name.
+ - **description** (string):
+ A detailed description of the tool.
+ - **input_schema** (object):
+ A JSON schema describing the expected input format for the tool.
+- **top_k** (integer):
+ Limits sampling to the top K options.
+- **top_p** (number):
+ Enables nucleus sampling with a cumulative probability cutoff. Valid range: `0 < top_p < 1`.
+
+
+## Response Format
+---
+
+Responses will be in the Anthropic messages API format.
+
+#### Example Response
+
+```json
+{
+ "content": [
+ {
+ "text": "Hi! My name is Claude.",
+ "type": "text"
+ }
+ ],
+ "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+ "model": "claude-3-7-sonnet-20250219",
+ "role": "assistant",
+ "stop_reason": "end_turn",
+ "stop_sequence": null,
+ "type": "message",
+ "usage": {
+ "input_tokens": 2095,
+ "output_tokens": 503,
+ "cache_creation_input_tokens": 2095,
+ "cache_read_input_tokens": 0
+ }
+}
+```
+
+#### Response fields
+
+- **content** (array of objects):
+ Contains the generated content blocks from the model. Each block includes:
+ - **type** (string):
+ Indicates the type of content (e.g., `"text"`, `"tool_use"`, `"thinking"`, or `"redacted_thinking"`).
+ - **text** (string):
+ The generated text from the model.
+ _Note: Maximum length is 5,000,000 characters._
+ - **citations** (array of objects or `null`):
+ Optional field providing citation details. Each citation includes:
+ - **cited_text** (string):
+ The excerpt being cited.
+ - **document_index** (integer):
+ An index referencing the cited document.
+ - **document_title** (string or `null`):
+ The title of the cited document.
+ - **start_char_index** (integer):
+ The starting character index for the citation.
+ - **end_char_index** (integer):
+ The ending character index for the citation.
+ - **type** (string):
+ Typically `"char_location"`.
+
+- **id** (string):
+ A unique identifier for the response message.
+ _Note: The format and length of IDs may change over time._
+
+- **model** (string):
+ Specifies the model that generated the response.
+
+- **role** (string):
+ Indicates the role of the generated message. For responses, this is always `"assistant"`.
+
+- **stop_reason** (string):
+ Explains why the model stopped generating text. Possible values include:
+ - `"end_turn"`: The model reached a natural stopping point.
+ - `"max_tokens"`: The generation stopped because the maximum token limit was reached.
+ - `"stop_sequence"`: A custom stop sequence was encountered.
+ - `"tool_use"`: The model invoked one or more tools.
+
+- **stop_sequence** (string or `null`):
+ Contains the specific stop sequence that caused the generation to halt, if applicable; otherwise, it is `null`.
+
+- **type** (string):
+ Denotes the type of response object, which is always `"message"`.
+
+- **usage** (object):
+ Provides details on token usage for billing and rate limiting. This includes:
+ - **input_tokens** (integer):
+ Total number of input tokens processed.
+ - **output_tokens** (integer):
+ Total number of output tokens generated.
+ - **cache_creation_input_tokens** (integer or `null`):
+ Number of tokens used to create a cache entry.
+ - **cache_read_input_tokens** (integer or `null`):
+ Number of tokens read from the cache.
diff --git a/litellm/__init__.py b/litellm/__init__.py
index c2e366e2b1..9997b9a8ac 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -1038,6 +1038,7 @@ from .cost_calculator import response_cost_calculator, cost_per_token
### ADAPTERS ###
from .types.adapter import AdapterItem
+import litellm.anthropic_interface as anthropic
adapters: List[AdapterItem] = []
diff --git a/litellm/anthropic_interface/__init__.py b/litellm/anthropic_interface/__init__.py
new file mode 100644
index 0000000000..9902fdc553
--- /dev/null
+++ b/litellm/anthropic_interface/__init__.py
@@ -0,0 +1,6 @@
+"""
+Anthropic module for LiteLLM
+"""
+from .messages import acreate, create
+
+__all__ = ["acreate", "create"]
diff --git a/litellm/anthropic_interface/messages/__init__.py b/litellm/anthropic_interface/messages/__init__.py
new file mode 100644
index 0000000000..f3249f981b
--- /dev/null
+++ b/litellm/anthropic_interface/messages/__init__.py
@@ -0,0 +1,117 @@
+"""
+Interface for Anthropic's messages API
+
+Use this to call LLMs in Anthropic /messages Request/Response format
+
+This is an __init__.py file to allow the following interface
+
+- litellm.messages.acreate
+- litellm.messages.create
+
+"""
+
+from typing import AsyncIterator, Dict, Iterator, List, Optional, Union
+
+from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
+ anthropic_messages as _async_anthropic_messages,
+)
+from litellm.types.llms.anthropic_messages.anthropic_response import (
+ AnthropicMessagesResponse,
+)
+
+
+async def acreate(
+ max_tokens: int,
+ messages: List[Dict],
+ model: str,
+ metadata: Optional[Dict] = None,
+ stop_sequences: Optional[List[str]] = None,
+ stream: Optional[bool] = False,
+ system: Optional[str] = None,
+ temperature: Optional[float] = 1.0,
+ thinking: Optional[Dict] = None,
+ tool_choice: Optional[Dict] = None,
+ tools: Optional[List[Dict]] = None,
+ top_k: Optional[int] = None,
+ top_p: Optional[float] = None,
+ **kwargs
+) -> Union[AnthropicMessagesResponse, AsyncIterator]:
+ """
+ Async wrapper for Anthropic's messages API
+
+ Args:
+ max_tokens (int): Maximum tokens to generate (required)
+ messages (List[Dict]): List of message objects with role and content (required)
+ model (str): Model name to use (required)
+ metadata (Dict, optional): Request metadata
+ stop_sequences (List[str], optional): Custom stop sequences
+ stream (bool, optional): Whether to stream the response
+ system (str, optional): System prompt
+ temperature (float, optional): Sampling temperature (0.0 to 1.0)
+ thinking (Dict, optional): Extended thinking configuration
+ tool_choice (Dict, optional): Tool choice configuration
+ tools (List[Dict], optional): List of tool definitions
+ top_k (int, optional): Top K sampling parameter
+ top_p (float, optional): Nucleus sampling parameter
+ **kwargs: Additional arguments
+
+ Returns:
+ Dict: Response from the API
+ """
+ return await _async_anthropic_messages(
+ max_tokens=max_tokens,
+ messages=messages,
+ model=model,
+ metadata=metadata,
+ stop_sequences=stop_sequences,
+ stream=stream,
+ system=system,
+ temperature=temperature,
+ thinking=thinking,
+ tool_choice=tool_choice,
+ tools=tools,
+ top_k=top_k,
+ top_p=top_p,
+ **kwargs,
+ )
+
+
+async def create(
+ max_tokens: int,
+ messages: List[Dict],
+ model: str,
+ metadata: Optional[Dict] = None,
+ stop_sequences: Optional[List[str]] = None,
+ stream: Optional[bool] = False,
+ system: Optional[str] = None,
+ temperature: Optional[float] = 1.0,
+ thinking: Optional[Dict] = None,
+ tool_choice: Optional[Dict] = None,
+ tools: Optional[List[Dict]] = None,
+ top_k: Optional[int] = None,
+ top_p: Optional[float] = None,
+ **kwargs
+) -> Union[AnthropicMessagesResponse, Iterator]:
+ """
+ Async wrapper for Anthropic's messages API
+
+ Args:
+ max_tokens (int): Maximum tokens to generate (required)
+ messages (List[Dict]): List of message objects with role and content (required)
+ model (str): Model name to use (required)
+ metadata (Dict, optional): Request metadata
+ stop_sequences (List[str], optional): Custom stop sequences
+ stream (bool, optional): Whether to stream the response
+ system (str, optional): System prompt
+ temperature (float, optional): Sampling temperature (0.0 to 1.0)
+ thinking (Dict, optional): Extended thinking configuration
+ tool_choice (Dict, optional): Tool choice configuration
+ tools (List[Dict], optional): List of tool definitions
+ top_k (int, optional): Top K sampling parameter
+ top_p (float, optional): Nucleus sampling parameter
+ **kwargs: Additional arguments
+
+ Returns:
+ Dict: Response from the API
+ """
+ raise NotImplementedError("This function is not implemented")
diff --git a/litellm/anthropic_interface/readme.md b/litellm/anthropic_interface/readme.md
new file mode 100644
index 0000000000..01c5f1b7c3
--- /dev/null
+++ b/litellm/anthropic_interface/readme.md
@@ -0,0 +1,116 @@
+## Use LLM API endpoints in Anthropic Interface
+
+Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking.
+
+
+## Usage
+---
+
+### LiteLLM Python SDK
+
+#### Non-streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+ messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+ api_key=api_key,
+ model="anthropic/claude-3-haiku-20240307",
+ max_tokens=100,
+)
+```
+
+Example response:
+```json
+{
+ "content": [
+ {
+ "text": "Hi! this is a very short joke",
+ "type": "text"
+ }
+ ],
+ "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+ "model": "claude-3-7-sonnet-20250219",
+ "role": "assistant",
+ "stop_reason": "end_turn",
+ "stop_sequence": null,
+ "type": "message",
+ "usage": {
+ "input_tokens": 2095,
+ "output_tokens": 503,
+ "cache_creation_input_tokens": 2095,
+ "cache_read_input_tokens": 0
+ }
+}
+```
+
+#### Streaming example
+```python showLineNumbers title="Example using LiteLLM Python SDK"
+import litellm
+response = await litellm.anthropic.messages.acreate(
+ messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+ api_key=api_key,
+ model="anthropic/claude-3-haiku-20240307",
+ max_tokens=100,
+ stream=True,
+)
+async for chunk in response:
+ print(chunk)
+```
+
+### LiteLLM Proxy Server
+
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+ - model_name: anthropic-claude
+ litellm_params:
+ model: claude-3-7-sonnet-latest
+```
+
+2. Start proxy
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+3. Test it!
+
+
+
+
+```python showLineNumbers title="Example using LiteLLM Proxy Server"
+import anthropic
+
+# point anthropic sdk to litellm proxy
+client = anthropic.Anthropic(
+ base_url="http://0.0.0.0:4000",
+ api_key="sk-1234",
+)
+
+response = client.messages.create(
+ messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
+ model="anthropic/claude-3-haiku-20240307",
+ max_tokens=100,
+)
+```
+
+
+
+```bash showLineNumbers title="Example using LiteLLM Proxy Server"
+curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
+-H 'content-type: application/json' \
+-H 'x-api-key: $LITELLM_API_KEY' \
+-H 'anthropic-version: 2023-06-01' \
+-d '{
+ "model": "anthropic-claude",
+ "messages": [
+ {
+ "role": "user",
+ "content": "Hello, can you tell me a short joke?"
+ }
+ ],
+ "max_tokens": 100
+}'
+```
\ No newline at end of file
diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
index 099a2acdae..a37d816770 100644
--- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
@@ -6,7 +6,7 @@
"""
import json
-from typing import Any, AsyncIterator, Dict, Optional, Union, cast
+from typing import AsyncIterator, Dict, List, Optional, Union, cast
import httpx
@@ -19,6 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client,
)
+from litellm.types.llms.anthropic_messages.anthropic_response import (
+ AnthropicMessagesResponse,
+)
from litellm.types.router import GenericLiteLLMParams
from litellm.types.utils import ProviderSpecificHeader
from litellm.utils import ProviderConfigManager, client
@@ -60,14 +63,25 @@ class AnthropicMessagesHandler:
@client
async def anthropic_messages(
- api_key: str,
+ max_tokens: int,
+ messages: List[Dict],
model: str,
- stream: bool = False,
+ metadata: Optional[Dict] = None,
+ stop_sequences: Optional[List[str]] = None,
+ stream: Optional[bool] = False,
+ system: Optional[str] = None,
+ temperature: Optional[float] = None,
+ thinking: Optional[Dict] = None,
+ tool_choice: Optional[Dict] = None,
+ tools: Optional[List[Dict]] = None,
+ top_k: Optional[int] = None,
+ top_p: Optional[float] = None,
+ api_key: Optional[str] = None,
api_base: Optional[str] = None,
client: Optional[AsyncHTTPHandler] = None,
custom_llm_provider: Optional[str] = None,
**kwargs,
-) -> Union[Dict[str, Any], AsyncIterator]:
+) -> Union[AnthropicMessagesResponse, AsyncIterator]:
"""
Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
"""
@@ -129,10 +143,8 @@ async def anthropic_messages(
},
custom_llm_provider=_custom_llm_provider,
)
- litellm_logging_obj.model_call_details.update(kwargs)
-
# Prepare request body
- request_body = kwargs.copy()
+ request_body = locals().copy()
request_body = {
k: v
for k, v in request_body.items()
@@ -140,10 +152,12 @@ async def anthropic_messages(
in anthropic_messages_provider_config.get_supported_anthropic_messages_params(
model=model
)
+ and v is not None
}
request_body["stream"] = stream
request_body["model"] = model
litellm_logging_obj.stream = stream
+ litellm_logging_obj.model_call_details.update(request_body)
# Make the request
request_url = anthropic_messages_provider_config.get_complete_url(
@@ -164,7 +178,7 @@ async def anthropic_messages(
url=request_url,
headers=headers,
data=json.dumps(request_body),
- stream=stream,
+ stream=stream or False,
)
response.raise_for_status()
diff --git a/litellm/types/llms/anthropic_messages/anthropic_response.py b/litellm/types/llms/anthropic_messages/anthropic_response.py
new file mode 100644
index 0000000000..270807fc8f
--- /dev/null
+++ b/litellm/types/llms/anthropic_messages/anthropic_response.py
@@ -0,0 +1,83 @@
+from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
+
+from typing_extensions import TypeAlias
+
+
+class AnthropicResponseTextBlock(TypedDict, total=False):
+ """
+ Anthropic Response Text Block: https://docs.anthropic.com/en/api/messages
+ """
+
+ citations: Optional[List[Dict[str, Any]]]
+ text: str
+ type: Literal["text"]
+
+
+class AnthropicResponseToolUseBlock(TypedDict, total=False):
+ """
+ Anthropic Response Tool Use Block: https://docs.anthropic.com/en/api/messages
+ """
+
+ id: Optional[str]
+ input: Optional[str]
+ name: Optional[str]
+ type: Literal["tool_use"]
+
+
+class AnthropicResponseThinkingBlock(TypedDict, total=False):
+ """
+ Anthropic Response Thinking Block: https://docs.anthropic.com/en/api/messages
+ """
+
+ signature: Optional[str]
+ thinking: Optional[str]
+ type: Literal["thinking"]
+
+
+class AnthropicResponseRedactedThinkingBlock(TypedDict, total=False):
+ """
+ Anthropic Response Redacted Thinking Block: https://docs.anthropic.com/en/api/messages
+ """
+
+ data: Optional[str]
+ type: Literal["redacted_thinking"]
+
+
+AnthropicResponseContentBlock: TypeAlias = Union[
+ AnthropicResponseTextBlock,
+ AnthropicResponseToolUseBlock,
+ AnthropicResponseThinkingBlock,
+ AnthropicResponseRedactedThinkingBlock,
+]
+
+
+class AnthropicUsage(TypedDict, total=False):
+ """
+ Input and output tokens used in the request
+ """
+
+ input_tokens: int
+ output_tokens: int
+
+ """
+ Cache Tokens Used
+ """
+ cache_creation_input_tokens: int
+ cache_read_input_tokens: int
+
+
+class AnthropicMessagesResponse(TypedDict, total=False):
+ """
+ Anthropic Messages API Response: https://docs.anthropic.com/en/api/messages
+ """
+
+ content: Optional[List[AnthropicResponseContentBlock]]
+ id: str
+ model: Optional[str] # This represents the Model type from Anthropic
+ role: Optional[Literal["assistant"]]
+ stop_reason: Optional[
+ Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
+ ]
+ stop_sequence: Optional[str]
+ type: Optional[Literal["message"]]
+ usage: Optional[AnthropicUsage]
diff --git a/mypy.ini b/mypy.ini
index 19ead3ba7d..3ce8c5fcc0 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -2,6 +2,7 @@
warn_return_any = False
ignore_missing_imports = True
mypy_path = litellm/stubs
+namespace_packages = True
[mypy-google.*]
ignore_missing_imports = True
diff --git a/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py b/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py
index b5b3302acc..ec268b1a24 100644
--- a/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py
+++ b/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py
@@ -8,7 +8,7 @@ import unittest.mock
from unittest.mock import AsyncMock, MagicMock
sys.path.insert(
- 0, os.path.abspath("../..")
+ 0, os.path.abspath("../../..")
) # Adds the parent directory to the system path
import litellm
import pytest
@@ -16,6 +16,7 @@ from dotenv import load_dotenv
from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
anthropic_messages,
)
+
from typing import Optional
from litellm.types.utils import StandardLoggingPayload
from litellm.integrations.custom_logger import CustomLogger
@@ -73,6 +74,7 @@ async def test_anthropic_messages_non_streaming():
"""
Test the anthropic_messages with non-streaming request
"""
+ litellm._turn_on_debug()
# Get API key from environment
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
@@ -82,7 +84,7 @@ async def test_anthropic_messages_non_streaming():
messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
# Call the handler
- response = await anthropic_messages(
+ response = await litellm.anthropic.messages.acreate(
messages=messages,
api_key=api_key,
model="claude-3-haiku-20240307",
@@ -114,7 +116,7 @@ async def test_anthropic_messages_streaming():
# Call the handler
async_httpx_client = AsyncHTTPHandler()
- response = await anthropic_messages(
+ response = await litellm.anthropic.messages.acreate(
messages=messages,
api_key=api_key,
model="claude-3-haiku-20240307",
@@ -134,7 +136,7 @@ async def test_anthropic_messages_streaming_with_bad_request():
Test the anthropic_messages with streaming request
"""
try:
- response = await anthropic_messages(
+ response = await litellm.anthropic.messages.acreate(
messages=["hi"],
api_key=os.getenv("ANTHROPIC_API_KEY"),
model="claude-3-haiku-20240307",
@@ -458,7 +460,7 @@ async def test_anthropic_messages_with_extra_headers():
mock_client.post = AsyncMock(return_value=mock_response)
# Call the handler with extra_headers and our mocked client
- response = await anthropic_messages(
+ response = await litellm.anthropic.messages.acreate(
messages=messages,
api_key=api_key,
model="claude-3-haiku-20240307",