From e93f84b347df6d16d53353c175e54c376d2123ea Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Mar 2025 16:53:33 -0700 Subject: [PATCH 01/15] add type hints for AnthropicMessagesResponse --- .../anthropic_messages/anthropic_response.py | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 litellm/types/llms/anthropic_messages/anthropic_response.py diff --git a/litellm/types/llms/anthropic_messages/anthropic_response.py b/litellm/types/llms/anthropic_messages/anthropic_response.py new file mode 100644 index 0000000000..e260220776 --- /dev/null +++ b/litellm/types/llms/anthropic_messages/anthropic_response.py @@ -0,0 +1,85 @@ +from typing import Any, Dict, List, Literal, Optional, TypedDict, Union + +import anthropic +from pydantic import PropertyInfo +from typing_extensions import Annotated, TypeAlias + + +class AnthropicResponseTextBlock(TypedDict, total=False): + """ + Anthropic Response Text Block: https://docs.anthropic.com/en/api/messages + """ + + citations: Optional[List[Dict[str, Any]]] + text: str + type: Literal["text"] + + +class AnthropicResponseToolUseBlock(TypedDict, total=False): + """ + Anthropic Response Tool Use Block: https://docs.anthropic.com/en/api/messages + """ + + id: Optional[str] + input: Optional[str] + name: Optional[str] + type: Literal["tool_use"] + + +class AnthropicResponseThinkingBlock(TypedDict, total=False): + """ + Anthropic Response Thinking Block: https://docs.anthropic.com/en/api/messages + """ + + signature: Optional[str] + thinking: Optional[str] + type: Literal["thinking"] + + +class AnthropicResponseRedactedThinkingBlock(TypedDict, total=False): + """ + Anthropic Response Redacted Thinking Block: https://docs.anthropic.com/en/api/messages + """ + + data: Optional[str] + type: Literal["redacted_thinking"] + + +AnthropicResponseContentBlock: TypeAlias = Union[ + AnthropicResponseTextBlock, + AnthropicResponseToolUseBlock, + AnthropicResponseThinkingBlock, + AnthropicResponseRedactedThinkingBlock, +] + + +class AnthropicUsage(TypedDict, total=False): + """ + Input and output tokens used in the request + """ + + input_tokens: int + output_tokens: int + + """ + Cache Tokens Used + """ + cache_creation_input_tokens: int + cache_read_input_tokens: int + + +class AnthropicMessagesResponse(TypedDict, total=False): + """ + Anthropic Messages API Response: https://docs.anthropic.com/en/api/messages + """ + + content: Optional[List[AnthropicResponseContentBlock]] + id: str + model: Optional[str] # This represents the Model type from Anthropic + role: Optional[Literal["assistant"]] + stop_reason: Optional[ + Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"] + ] + stop_sequence: Optional[str] + type: Optional[Literal["message"]] + usage: Optional[AnthropicUsage] From 957b7eb82c803d79fe9c914fba70e4af3530fc73 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Mar 2025 16:54:45 -0700 Subject: [PATCH 02/15] define types for response form AnthropicMessagesResponse --- .../anthropic/experimental_pass_through/messages/handler.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py index a7dfff74d9..9b890db266 100644 --- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py +++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py @@ -19,6 +19,9 @@ from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, get_async_httpx_client, ) +from litellm.types.llms.anthropic_messages.anthropic_response import ( + AnthropicMessagesResponse, +) from litellm.types.router import GenericLiteLLMParams from litellm.types.utils import ProviderSpecificHeader from litellm.utils import ProviderConfigManager, client @@ -68,7 +71,7 @@ async def anthropic_messages( client: Optional[AsyncHTTPHandler] = None, custom_llm_provider: Optional[str] = None, **kwargs, -) -> Union[Dict[str, Any], AsyncIterator]: +) -> Union[AnthropicMessagesResponse, AsyncIterator]: """ Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec """ From 968ef4a299dff3a7a476a4f24ba40a6fdb2df9fa Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Mar 2025 16:56:56 -0700 Subject: [PATCH 03/15] fix response typing --- litellm/types/llms/anthropic_messages/anthropic_response.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/litellm/types/llms/anthropic_messages/anthropic_response.py b/litellm/types/llms/anthropic_messages/anthropic_response.py index e260220776..270807fc8f 100644 --- a/litellm/types/llms/anthropic_messages/anthropic_response.py +++ b/litellm/types/llms/anthropic_messages/anthropic_response.py @@ -1,8 +1,6 @@ from typing import Any, Dict, List, Literal, Optional, TypedDict, Union -import anthropic -from pydantic import PropertyInfo -from typing_extensions import Annotated, TypeAlias +from typing_extensions import TypeAlias class AnthropicResponseTextBlock(TypedDict, total=False): From 07dce8bed324fd0d60c088d80b2362452b74fb34 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Mar 2025 17:05:38 -0700 Subject: [PATCH 04/15] allow using litellm.messages.acreate and litellm.messages.create --- litellm/__init__.py | 1 + litellm/messages/__init__.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 litellm/messages/__init__.py diff --git a/litellm/__init__.py b/litellm/__init__.py index a59484b035..0080e9551c 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -26,6 +26,7 @@ from litellm._logging import ( log_level, ) import re +from .messages import * from litellm.constants import ( DEFAULT_BATCH_SIZE, DEFAULT_FLUSH_INTERVAL_SECONDS, diff --git a/litellm/messages/__init__.py b/litellm/messages/__init__.py new file mode 100644 index 0000000000..1274f768ba --- /dev/null +++ b/litellm/messages/__init__.py @@ -0,0 +1,16 @@ +""" +Interface for Anthropic's messages API + +Use this to call LLMs in Anthropic /messages Request/Response format +""" + +from litellm.llms.anthropic.experimental_pass_through.handler import ( + anthropic_messages as _async_anthropic_messages, +) + + +async def acreate(*args, **kwargs): + """ + Wrapper around Anthropic's messages API + """ + return await _async_anthropic_messages(*args, **kwargs) From 3640262dbfacccefd3f8b70ac2677a18b4e3a345 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Mar 2025 17:12:40 -0700 Subject: [PATCH 05/15] fix anthropic_messages implementation --- .../messages/handler.py | 21 +++- litellm/messages/__init__.py | 98 ++++++++++++++++++- 2 files changed, 111 insertions(+), 8 deletions(-) diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py index 9b890db266..54826a38ba 100644 --- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py +++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py @@ -6,7 +6,7 @@ """ import json -from typing import Any, AsyncIterator, Dict, Optional, Union, cast +from typing import AsyncIterator, Dict, List, Optional, Union, cast import httpx @@ -64,9 +64,20 @@ class AnthropicMessagesHandler: @client async def anthropic_messages( - api_key: str, + max_tokens: int, + messages: List[Dict[str, Union[str, List[Dict[str, str]]]]], model: str, - stream: bool = False, + metadata: Optional[Dict] = None, + stop_sequences: Optional[List[str]] = None, + stream: Optional[bool] = False, + system: Optional[str] = None, + temperature: Optional[float] = 1.0, + thinking: Optional[Dict] = None, + tool_choice: Optional[Dict] = None, + tools: Optional[List[Dict]] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + api_key: Optional[str] = None, api_base: Optional[str] = None, client: Optional[AsyncHTTPHandler] = None, custom_llm_provider: Optional[str] = None, @@ -133,7 +144,7 @@ async def anthropic_messages( litellm_logging_obj.model_call_details.update(kwargs) # Prepare request body - request_body = kwargs.copy() + request_body = locals().copy() request_body = { k: v for k, v in request_body.items() @@ -165,7 +176,7 @@ async def anthropic_messages( url=request_url, headers=headers, data=json.dumps(request_body), - stream=stream, + stream=stream or False, ) response.raise_for_status() diff --git a/litellm/messages/__init__.py b/litellm/messages/__init__.py index 1274f768ba..0a4ea92c2c 100644 --- a/litellm/messages/__init__.py +++ b/litellm/messages/__init__.py @@ -4,13 +4,105 @@ Interface for Anthropic's messages API Use this to call LLMs in Anthropic /messages Request/Response format """ +from typing import Dict, List, Optional, Union + from litellm.llms.anthropic.experimental_pass_through.handler import ( anthropic_messages as _async_anthropic_messages, ) -async def acreate(*args, **kwargs): +async def acreate( + max_tokens: int, + messages: List[Dict[str, Union[str, List[Dict[str, str]]]]], + model: str, + metadata: Optional[Dict] = None, + stop_sequences: Optional[List[str]] = None, + stream: Optional[bool] = False, + system: Optional[str] = None, + temperature: Optional[float] = 1.0, + thinking: Optional[Dict] = None, + tool_choice: Optional[Dict] = None, + tools: Optional[List[Dict]] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + **kwargs +) -> Dict: """ - Wrapper around Anthropic's messages API + Async wrapper for Anthropic's messages API + + Args: + max_tokens (int): Maximum tokens to generate (required) + messages (List[Dict]): List of message objects with role and content (required) + model (str): Model name to use (required) + metadata (Dict, optional): Request metadata + stop_sequences (List[str], optional): Custom stop sequences + stream (bool, optional): Whether to stream the response + system (str, optional): System prompt + temperature (float, optional): Sampling temperature (0.0 to 1.0) + thinking (Dict, optional): Extended thinking configuration + tool_choice (Dict, optional): Tool choice configuration + tools (List[Dict], optional): List of tool definitions + top_k (int, optional): Top K sampling parameter + top_p (float, optional): Nucleus sampling parameter + **kwargs: Additional arguments + + Returns: + Dict: Response from the API """ - return await _async_anthropic_messages(*args, **kwargs) + return await _async_anthropic_messages( + max_tokens=max_tokens, + messages=messages, + model=model, + metadata=metadata, + stop_sequences=stop_sequences, + stream=stream, + system=system, + temperature=temperature, + thinking=thinking, + tool_choice=tool_choice, + tools=tools, + top_k=top_k, + top_p=top_p, + **kwargs, + ) + + +async def create( + max_tokens: int, + messages: List[Dict[str, Union[str, List[Dict[str, str]]]]], + model: str, + metadata: Optional[Dict] = None, + stop_sequences: Optional[List[str]] = None, + stream: Optional[bool] = False, + system: Optional[str] = None, + temperature: Optional[float] = 1.0, + thinking: Optional[Dict] = None, + tool_choice: Optional[Dict] = None, + tools: Optional[List[Dict]] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + **kwargs +) -> Dict: + """ + Async wrapper for Anthropic's messages API + + Args: + max_tokens (int): Maximum tokens to generate (required) + messages (List[Dict]): List of message objects with role and content (required) + model (str): Model name to use (required) + metadata (Dict, optional): Request metadata + stop_sequences (List[str], optional): Custom stop sequences + stream (bool, optional): Whether to stream the response + system (str, optional): System prompt + temperature (float, optional): Sampling temperature (0.0 to 1.0) + thinking (Dict, optional): Extended thinking configuration + tool_choice (Dict, optional): Tool choice configuration + tools (List[Dict], optional): List of tool definitions + top_k (int, optional): Top K sampling parameter + top_p (float, optional): Nucleus sampling parameter + **kwargs: Additional arguments + + Returns: + Dict: Response from the API + """ + raise NotImplementedError("This function is not implemented") From 1b085a306a6853a3e5aebc78800771dce2e3b2a3 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Mar 2025 17:17:56 -0700 Subject: [PATCH 06/15] add clear type hints to litellm.messages.create functions --- litellm/messages/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/litellm/messages/__init__.py b/litellm/messages/__init__.py index 0a4ea92c2c..b2be67bbd9 100644 --- a/litellm/messages/__init__.py +++ b/litellm/messages/__init__.py @@ -9,6 +9,9 @@ from typing import Dict, List, Optional, Union from litellm.llms.anthropic.experimental_pass_through.handler import ( anthropic_messages as _async_anthropic_messages, ) +from litellm.types.llms.anthropic_messages.anthropic_response import ( + AnthropicMessagesResponse, +) async def acreate( @@ -26,7 +29,7 @@ async def acreate( top_k: Optional[int] = None, top_p: Optional[float] = None, **kwargs -) -> Dict: +) -> AnthropicMessagesResponse: """ Async wrapper for Anthropic's messages API @@ -82,7 +85,7 @@ async def create( top_k: Optional[int] = None, top_p: Optional[float] = None, **kwargs -) -> Dict: +) -> AnthropicMessagesResponse: """ Async wrapper for Anthropic's messages API From 8dcdff92803c916dd6e9fb3638cfad24cd2bde64 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Mar 2025 17:21:14 -0700 Subject: [PATCH 07/15] fix anthropic_messages --- .../anthropic/experimental_pass_through/messages/handler.py | 2 +- litellm/messages/__init__.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py index 54826a38ba..0b809a4460 100644 --- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py +++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py @@ -65,7 +65,7 @@ class AnthropicMessagesHandler: @client async def anthropic_messages( max_tokens: int, - messages: List[Dict[str, Union[str, List[Dict[str, str]]]]], + messages: List[Dict], model: str, metadata: Optional[Dict] = None, stop_sequences: Optional[List[str]] = None, diff --git a/litellm/messages/__init__.py b/litellm/messages/__init__.py index b2be67bbd9..c5cb3f5330 100644 --- a/litellm/messages/__init__.py +++ b/litellm/messages/__init__.py @@ -16,7 +16,7 @@ from litellm.types.llms.anthropic_messages.anthropic_response import ( async def acreate( max_tokens: int, - messages: List[Dict[str, Union[str, List[Dict[str, str]]]]], + messages: List[Dict], model: str, metadata: Optional[Dict] = None, stop_sequences: Optional[List[str]] = None, @@ -72,7 +72,7 @@ async def acreate( async def create( max_tokens: int, - messages: List[Dict[str, Union[str, List[Dict[str, str]]]]], + messages: List[Dict], model: str, metadata: Optional[Dict] = None, stop_sequences: Optional[List[str]] = None, From 9eb9a369bbfc301f4998e555fa3082c29aea4dd1 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Mar 2025 17:34:41 -0700 Subject: [PATCH 08/15] working anthropic API tests --- litellm/__init__.py | 2 +- .../experimental_pass_through/messages/handler.py | 3 ++- litellm/messages/__init__.py | 14 ++++++++++---- .../test_anthropic_messages_passthrough.py | 12 +++++++----- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/litellm/__init__.py b/litellm/__init__.py index 0080e9551c..4e1410a319 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -26,7 +26,6 @@ from litellm._logging import ( log_level, ) import re -from .messages import * from litellm.constants import ( DEFAULT_BATCH_SIZE, DEFAULT_FLUSH_INTERVAL_SECONDS, @@ -1028,6 +1027,7 @@ from .proxy.proxy_cli import run_server from .router import Router from .assistants.main import * from .batches.main import * +from .messages import * from .batch_completion.main import * # type: ignore from .rerank_api.main import * from .llms.anthropic.experimental_pass_through.messages.handler import * diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py index 0b809a4460..fd7a3d60a8 100644 --- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py +++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py @@ -71,7 +71,7 @@ async def anthropic_messages( stop_sequences: Optional[List[str]] = None, stream: Optional[bool] = False, system: Optional[str] = None, - temperature: Optional[float] = 1.0, + temperature: Optional[float] = None, thinking: Optional[Dict] = None, tool_choice: Optional[Dict] = None, tools: Optional[List[Dict]] = None, @@ -152,6 +152,7 @@ async def anthropic_messages( in anthropic_messages_provider_config.get_supported_anthropic_messages_params( model=model ) + and v is not None } request_body["stream"] = stream request_body["model"] = model diff --git a/litellm/messages/__init__.py b/litellm/messages/__init__.py index c5cb3f5330..f3249f981b 100644 --- a/litellm/messages/__init__.py +++ b/litellm/messages/__init__.py @@ -2,11 +2,17 @@ Interface for Anthropic's messages API Use this to call LLMs in Anthropic /messages Request/Response format + +This is an __init__.py file to allow the following interface + +- litellm.messages.acreate +- litellm.messages.create + """ -from typing import Dict, List, Optional, Union +from typing import AsyncIterator, Dict, Iterator, List, Optional, Union -from litellm.llms.anthropic.experimental_pass_through.handler import ( +from litellm.llms.anthropic.experimental_pass_through.messages.handler import ( anthropic_messages as _async_anthropic_messages, ) from litellm.types.llms.anthropic_messages.anthropic_response import ( @@ -29,7 +35,7 @@ async def acreate( top_k: Optional[int] = None, top_p: Optional[float] = None, **kwargs -) -> AnthropicMessagesResponse: +) -> Union[AnthropicMessagesResponse, AsyncIterator]: """ Async wrapper for Anthropic's messages API @@ -85,7 +91,7 @@ async def create( top_k: Optional[int] = None, top_p: Optional[float] = None, **kwargs -) -> AnthropicMessagesResponse: +) -> Union[AnthropicMessagesResponse, Iterator]: """ Async wrapper for Anthropic's messages API diff --git a/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py b/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py index b5b3302acc..cb7ac0d3bf 100644 --- a/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py +++ b/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py @@ -8,7 +8,7 @@ import unittest.mock from unittest.mock import AsyncMock, MagicMock sys.path.insert( - 0, os.path.abspath("../..") + 0, os.path.abspath("../../..") ) # Adds the parent directory to the system path import litellm import pytest @@ -16,6 +16,7 @@ from dotenv import load_dotenv from litellm.llms.anthropic.experimental_pass_through.messages.handler import ( anthropic_messages, ) + from typing import Optional from litellm.types.utils import StandardLoggingPayload from litellm.integrations.custom_logger import CustomLogger @@ -73,6 +74,7 @@ async def test_anthropic_messages_non_streaming(): """ Test the anthropic_messages with non-streaming request """ + litellm._turn_on_debug() # Get API key from environment api_key = os.getenv("ANTHROPIC_API_KEY") if not api_key: @@ -82,7 +84,7 @@ async def test_anthropic_messages_non_streaming(): messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}] # Call the handler - response = await anthropic_messages( + response = await litellm.messages.acreate( messages=messages, api_key=api_key, model="claude-3-haiku-20240307", @@ -114,7 +116,7 @@ async def test_anthropic_messages_streaming(): # Call the handler async_httpx_client = AsyncHTTPHandler() - response = await anthropic_messages( + response = await litellm.messages.acreate( messages=messages, api_key=api_key, model="claude-3-haiku-20240307", @@ -134,7 +136,7 @@ async def test_anthropic_messages_streaming_with_bad_request(): Test the anthropic_messages with streaming request """ try: - response = await anthropic_messages( + response = await litellm.messages.acreate( messages=["hi"], api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-haiku-20240307", @@ -458,7 +460,7 @@ async def test_anthropic_messages_with_extra_headers(): mock_client.post = AsyncMock(return_value=mock_response) # Call the handler with extra_headers and our mocked client - response = await anthropic_messages( + response = await litellm.messages.acreate( messages=messages, api_key=api_key, model="claude-3-haiku-20240307", From 8499a88e4a45dcd887c027b143dbea892a2480dd Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Mar 2025 17:45:47 -0700 Subject: [PATCH 09/15] fixes - anthropic messages interface --- .../anthropic/experimental_pass_through/messages/handler.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py index fd7a3d60a8..8b4c947a7c 100644 --- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py +++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py @@ -141,8 +141,6 @@ async def anthropic_messages( }, custom_llm_provider=_custom_llm_provider, ) - litellm_logging_obj.model_call_details.update(kwargs) - # Prepare request body request_body = locals().copy() request_body = { @@ -157,6 +155,7 @@ async def anthropic_messages( request_body["stream"] = stream request_body["model"] = model litellm_logging_obj.stream = stream + litellm_logging_obj.model_call_details.update(request_body) # Make the request request_url = anthropic_messages_provider_config.get_complete_url( From bd39a395f1eb33f0398bb74aa2e3f263908c65e2 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 31 Mar 2025 14:31:09 -0700 Subject: [PATCH 10/15] use new anthropic interface --- litellm/__init__.py | 2 +- litellm/anthropic/__init__.py | 4 ++++ litellm/{ => anthropic}/messages/__init__.py | 0 .../test_anthropic_messages_passthrough.py | 8 ++++---- 4 files changed, 9 insertions(+), 5 deletions(-) create mode 100644 litellm/anthropic/__init__.py rename litellm/{ => anthropic}/messages/__init__.py (100%) diff --git a/litellm/__init__.py b/litellm/__init__.py index a62322cef9..4db5bff11b 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -1026,7 +1026,7 @@ from .proxy.proxy_cli import run_server from .router import Router from .assistants.main import * from .batches.main import * -from .messages import * +from .anthropic import * from .batch_completion.main import * # type: ignore from .rerank_api.main import * from .llms.anthropic.experimental_pass_through.messages.handler import * diff --git a/litellm/anthropic/__init__.py b/litellm/anthropic/__init__.py new file mode 100644 index 0000000000..c56a530c3c --- /dev/null +++ b/litellm/anthropic/__init__.py @@ -0,0 +1,4 @@ +""" +Anthropic module for LiteLLM +""" +from .messages import acreate, create diff --git a/litellm/messages/__init__.py b/litellm/anthropic/messages/__init__.py similarity index 100% rename from litellm/messages/__init__.py rename to litellm/anthropic/messages/__init__.py diff --git a/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py b/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py index cb7ac0d3bf..ec268b1a24 100644 --- a/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py +++ b/tests/pass_through_unit_tests/test_anthropic_messages_passthrough.py @@ -84,7 +84,7 @@ async def test_anthropic_messages_non_streaming(): messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}] # Call the handler - response = await litellm.messages.acreate( + response = await litellm.anthropic.messages.acreate( messages=messages, api_key=api_key, model="claude-3-haiku-20240307", @@ -116,7 +116,7 @@ async def test_anthropic_messages_streaming(): # Call the handler async_httpx_client = AsyncHTTPHandler() - response = await litellm.messages.acreate( + response = await litellm.anthropic.messages.acreate( messages=messages, api_key=api_key, model="claude-3-haiku-20240307", @@ -136,7 +136,7 @@ async def test_anthropic_messages_streaming_with_bad_request(): Test the anthropic_messages with streaming request """ try: - response = await litellm.messages.acreate( + response = await litellm.anthropic.messages.acreate( messages=["hi"], api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-haiku-20240307", @@ -460,7 +460,7 @@ async def test_anthropic_messages_with_extra_headers(): mock_client.post = AsyncMock(return_value=mock_response) # Call the handler with extra_headers and our mocked client - response = await litellm.messages.acreate( + response = await litellm.anthropic.messages.acreate( messages=messages, api_key=api_key, model="claude-3-haiku-20240307", From cfab8eec5b0dfdd464040467f177b4947823352e Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 31 Mar 2025 14:44:12 -0700 Subject: [PATCH 11/15] fix code quality check --- litellm/anthropic/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/anthropic/__init__.py b/litellm/anthropic/__init__.py index c56a530c3c..9902fdc553 100644 --- a/litellm/anthropic/__init__.py +++ b/litellm/anthropic/__init__.py @@ -2,3 +2,5 @@ Anthropic module for LiteLLM """ from .messages import acreate, create + +__all__ = ["acreate", "create"] From b8c0526b98bd21d4d810acdc322c351e7f12ea75 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 31 Mar 2025 15:28:40 -0700 Subject: [PATCH 12/15] docs anthropic messages endpoint --- docs/my-website/docs/anthropic_unified.md | 271 +++++++++++++++++++--- 1 file changed, 240 insertions(+), 31 deletions(-) diff --git a/docs/my-website/docs/anthropic_unified.md b/docs/my-website/docs/anthropic_unified.md index cf6ba798d5..485571aa28 100644 --- a/docs/my-website/docs/anthropic_unified.md +++ b/docs/my-website/docs/anthropic_unified.md @@ -3,9 +3,10 @@ import TabItem from '@theme/TabItem'; # /v1/messages [BETA] -LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint. +Use LiteLLM to call all your LLM APIs in the Anthropic `v1/messages` format. -This currently just supports the Anthropic API. + +## Overview | Feature | Supported | Notes | |-------|-------|-------| @@ -21,9 +22,61 @@ Planned improvement: - Bedrock Anthropic support ## Usage +--- + +### LiteLLM Python SDK + +#### Non-streaming example +```python showLineNumbers title="Example using LiteLLM Python SDK" +import litellm +response = await litellm.anthropic.messages.acreate( + messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}], + api_key=api_key, + model="anthropic/claude-3-haiku-20240307", + max_tokens=100, +) +``` + +Example response: +```json +{ + "content": [ + { + "text": "Hi! this is a very short joke", + "type": "text" + } + ], + "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF", + "model": "claude-3-7-sonnet-20250219", + "role": "assistant", + "stop_reason": "end_turn", + "stop_sequence": null, + "type": "message", + "usage": { + "input_tokens": 2095, + "output_tokens": 503, + "cache_creation_input_tokens": 2095, + "cache_read_input_tokens": 0 + } +} +``` + +#### Streaming example +```python showLineNumbers title="Example using LiteLLM Python SDK" +import litellm +response = await litellm.anthropic.messages.acreate( + messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}], + api_key=api_key, + model="anthropic/claude-3-haiku-20240307", + max_tokens=100, + stream=True, +) +async for chunk in response: + print(chunk) +``` + +### LiteLLM Proxy Server - - 1. Setup config.yaml @@ -42,7 +95,28 @@ litellm --config /path/to/config.yaml 3. Test it! -```bash + + + +```python showLineNumbers title="Example using LiteLLM Proxy Server" +import anthropic + +# point anthropic sdk to litellm proxy +client = anthropic.Anthropic( + base_url="http://0.0.0.0:4000", + api_key="sk-1234", +) + +response = client.messages.create( + messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}], + model="anthropic/claude-3-haiku-20240307", + max_tokens=100, +) +``` + + + +```bash showLineNumbers title="Example using LiteLLM Proxy Server" curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \ -H 'content-type: application/json' \ -H 'x-api-key: $LITELLM_API_KEY' \ @@ -52,41 +126,176 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \ "messages": [ { "role": "user", - "content": [ - { - "type": "text", - "text": "List 5 important events in the XIX century" - } - ] + "content": "Hello, can you tell me a short joke?" } ], - "max_tokens": 4096 + "max_tokens": 100 }' ``` + - + -```python -from litellm.llms.anthropic.experimental_pass_through.messages.handler import anthropic_messages -import asyncio -import os -# set env -os.environ["ANTHROPIC_API_KEY"] = "my-api-key" +## Request Format +--- -messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}] +Request body will be in the Anthropic messages API format. **litellm follows the Anthropic messages specification for this endpoint.** -# Call the handler -async def call(): - response = await anthropic_messages( - messages=messages, - api_key=api_key, - model="claude-3-haiku-20240307", - max_tokens=100, - ) +#### Example request body -asyncio.run(call()) +```json +{ + "model": "claude-3-7-sonnet-20250219", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "Hello, world" + } + ] +} ``` - - \ No newline at end of file +#### Required Fields +- **model** (string): + The model identifier (e.g., `"claude-3-7-sonnet-20250219"`). +- **max_tokens** (integer): + The maximum number of tokens to generate before stopping. + _Note: The model may stop before reaching this limit; value must be greater than 1._ +- **messages** (array of objects): + An ordered list of conversational turns. + Each message object must include: + - **role** (enum: `"user"` or `"assistant"`): + Specifies the speaker of the message. + - **content** (string or array of content blocks): + The text or content blocks (e.g., an array containing objects with a `type` such as `"text"`) that form the message. + _Example equivalence:_ + ```json + {"role": "user", "content": "Hello, Claude"} + ``` + is equivalent to: + ```json + {"role": "user", "content": [{"type": "text", "text": "Hello, Claude"}]} + ``` + +#### Optional Fields +- **metadata** (object): + Contains additional metadata about the request (e.g., `user_id` as an opaque identifier). +- **stop_sequences** (array of strings): + Custom sequences that, when encountered in the generated text, cause the model to stop. +- **stream** (boolean): + Indicates whether to stream the response using server-sent events. +- **system** (string or array): + A system prompt providing context or specific instructions to the model. +- **temperature** (number): + Controls randomness in the model’s responses. Valid range: `0 < temperature < 1`. +- **thinking** (object): + Configuration for enabling extended thinking. If enabled, it includes: + - **budget_tokens** (integer): + Minimum of 1024 tokens (and less than `max_tokens`). + - **type** (enum): + E.g., `"enabled"`. +- **tool_choice** (object): + Instructs how the model should utilize any provided tools. +- **tools** (array of objects): + Definitions for tools available to the model. Each tool includes: + - **name** (string): + The tool’s name. + - **description** (string): + A detailed description of the tool. + - **input_schema** (object): + A JSON schema describing the expected input format for the tool. +- **top_k** (integer): + Limits sampling to the top K options. +- **top_p** (number): + Enables nucleus sampling with a cumulative probability cutoff. Valid range: `0 < top_p < 1`. + + +## Response Format +--- + +Responses will be in the Anthropic messages API format. + +#### Example Response + +```json +{ + "content": [ + { + "text": "Hi! My name is Claude.", + "type": "text" + } + ], + "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF", + "model": "claude-3-7-sonnet-20250219", + "role": "assistant", + "stop_reason": "end_turn", + "stop_sequence": null, + "type": "message", + "usage": { + "input_tokens": 2095, + "output_tokens": 503, + "cache_creation_input_tokens": 2095, + "cache_read_input_tokens": 0 + } +} +``` + +#### Response fields + +- **content** (array of objects): + Contains the generated content blocks from the model. Each block includes: + - **type** (string): + Indicates the type of content (e.g., `"text"`, `"tool_use"`, `"thinking"`, or `"redacted_thinking"`). + - **text** (string): + The generated text from the model. + _Note: Maximum length is 5,000,000 characters._ + - **citations** (array of objects or `null`): + Optional field providing citation details. Each citation includes: + - **cited_text** (string): + The excerpt being cited. + - **document_index** (integer): + An index referencing the cited document. + - **document_title** (string or `null`): + The title of the cited document. + - **start_char_index** (integer): + The starting character index for the citation. + - **end_char_index** (integer): + The ending character index for the citation. + - **type** (string): + Typically `"char_location"`. + +- **id** (string): + A unique identifier for the response message. + _Note: The format and length of IDs may change over time._ + +- **model** (string): + Specifies the model that generated the response. + +- **role** (string): + Indicates the role of the generated message. For responses, this is always `"assistant"`. + +- **stop_reason** (string): + Explains why the model stopped generating text. Possible values include: + - `"end_turn"`: The model reached a natural stopping point. + - `"max_tokens"`: The generation stopped because the maximum token limit was reached. + - `"stop_sequence"`: A custom stop sequence was encountered. + - `"tool_use"`: The model invoked one or more tools. + +- **stop_sequence** (string or `null`): + Contains the specific stop sequence that caused the generation to halt, if applicable; otherwise, it is `null`. + +- **type** (string): + Denotes the type of response object, which is always `"message"`. + +- **usage** (object): + Provides details on token usage for billing and rate limiting. This includes: + - **input_tokens** (integer): + Total number of input tokens processed. + - **output_tokens** (integer): + Total number of output tokens generated. + - **cache_creation_input_tokens** (integer or `null`): + Number of tokens used to create a cache entry. + - **cache_read_input_tokens** (integer or `null`): + Number of tokens read from the cache. From 43db2011cc460c76c5ade138caf24cf5b1935f8b Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 31 Mar 2025 15:36:15 -0700 Subject: [PATCH 13/15] add namespace_packages = True to mypy --- mypy.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/mypy.ini b/mypy.ini index 19ead3ba7d..3ce8c5fcc0 100644 --- a/mypy.ini +++ b/mypy.ini @@ -2,6 +2,7 @@ warn_return_any = False ignore_missing_imports = True mypy_path = litellm/stubs +namespace_packages = True [mypy-google.*] ignore_missing_imports = True From ceab3cfa7ce6e9856fbf0b02dbfa4aefb2b90530 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 31 Mar 2025 16:52:39 -0700 Subject: [PATCH 14/15] fix mypy lint errors --- litellm/__init__.py | 2 +- litellm/{anthropic => anthropic_interface}/__init__.py | 0 .../messages/__init__.py | 0 litellm/anthropic_interface/readme.md | 9 +++++++++ 4 files changed, 10 insertions(+), 1 deletion(-) rename litellm/{anthropic => anthropic_interface}/__init__.py (100%) rename litellm/{anthropic => anthropic_interface}/messages/__init__.py (100%) create mode 100644 litellm/anthropic_interface/readme.md diff --git a/litellm/__init__.py b/litellm/__init__.py index 4db5bff11b..9997b9a8ac 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -1026,7 +1026,6 @@ from .proxy.proxy_cli import run_server from .router import Router from .assistants.main import * from .batches.main import * -from .anthropic import * from .batch_completion.main import * # type: ignore from .rerank_api.main import * from .llms.anthropic.experimental_pass_through.messages.handler import * @@ -1039,6 +1038,7 @@ from .cost_calculator import response_cost_calculator, cost_per_token ### ADAPTERS ### from .types.adapter import AdapterItem +import litellm.anthropic_interface as anthropic adapters: List[AdapterItem] = [] diff --git a/litellm/anthropic/__init__.py b/litellm/anthropic_interface/__init__.py similarity index 100% rename from litellm/anthropic/__init__.py rename to litellm/anthropic_interface/__init__.py diff --git a/litellm/anthropic/messages/__init__.py b/litellm/anthropic_interface/messages/__init__.py similarity index 100% rename from litellm/anthropic/messages/__init__.py rename to litellm/anthropic_interface/messages/__init__.py diff --git a/litellm/anthropic_interface/readme.md b/litellm/anthropic_interface/readme.md new file mode 100644 index 0000000000..69099dd634 --- /dev/null +++ b/litellm/anthropic_interface/readme.md @@ -0,0 +1,9 @@ +## Use LLM API endpoints in Anthropic Interface + +Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking. + +### Usage + +```python +import litellm.anthropic_interface as anthropic +``` \ No newline at end of file From d91bf3cc91c388de4a72dfd448b937c3b44c613d Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 31 Mar 2025 16:53:39 -0700 Subject: [PATCH 15/15] docs anthropic messages interface --- litellm/anthropic_interface/readme.md | 113 +++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 3 deletions(-) diff --git a/litellm/anthropic_interface/readme.md b/litellm/anthropic_interface/readme.md index 69099dd634..01c5f1b7c3 100644 --- a/litellm/anthropic_interface/readme.md +++ b/litellm/anthropic_interface/readme.md @@ -2,8 +2,115 @@ Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking. -### Usage -```python -import litellm.anthropic_interface as anthropic +## Usage +--- + +### LiteLLM Python SDK + +#### Non-streaming example +```python showLineNumbers title="Example using LiteLLM Python SDK" +import litellm +response = await litellm.anthropic.messages.acreate( + messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}], + api_key=api_key, + model="anthropic/claude-3-haiku-20240307", + max_tokens=100, +) +``` + +Example response: +```json +{ + "content": [ + { + "text": "Hi! this is a very short joke", + "type": "text" + } + ], + "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF", + "model": "claude-3-7-sonnet-20250219", + "role": "assistant", + "stop_reason": "end_turn", + "stop_sequence": null, + "type": "message", + "usage": { + "input_tokens": 2095, + "output_tokens": 503, + "cache_creation_input_tokens": 2095, + "cache_read_input_tokens": 0 + } +} +``` + +#### Streaming example +```python showLineNumbers title="Example using LiteLLM Python SDK" +import litellm +response = await litellm.anthropic.messages.acreate( + messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}], + api_key=api_key, + model="anthropic/claude-3-haiku-20240307", + max_tokens=100, + stream=True, +) +async for chunk in response: + print(chunk) +``` + +### LiteLLM Proxy Server + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: anthropic-claude + litellm_params: + model: claude-3-7-sonnet-latest +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + + + + +```python showLineNumbers title="Example using LiteLLM Proxy Server" +import anthropic + +# point anthropic sdk to litellm proxy +client = anthropic.Anthropic( + base_url="http://0.0.0.0:4000", + api_key="sk-1234", +) + +response = client.messages.create( + messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}], + model="anthropic/claude-3-haiku-20240307", + max_tokens=100, +) +``` + + + +```bash showLineNumbers title="Example using LiteLLM Proxy Server" +curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \ +-H 'content-type: application/json' \ +-H 'x-api-key: $LITELLM_API_KEY' \ +-H 'anthropic-version: 2023-06-01' \ +-d '{ + "model": "anthropic-claude", + "messages": [ + { + "role": "user", + "content": "Hello, can you tell me a short joke?" + } + ], + "max_tokens": 100 +}' ``` \ No newline at end of file