Anthropic Citations API Support (#8382)

* test(test_anthropic_completion.py): add test ensuring anthropic structured output response is consistent Resolves https://github.com/BerriAI/litellm/issues/8291 * feat(anthropic.py): support citations api with new user document message format Resolves https://github.com/BerriAI/litellm/issues/7970 * fix(anthropic/chat/transformation.py): return citations as a provider-specific-field Resolves https://github.com/BerriAI/litellm/issues/7970 * feat(anthropic/chat/handler.py): add streaming citations support Resolves https://github.com/BerriAI/litellm/issues/7970 * fix(handler.py): fix code qa error * fix(handler.py): only set provider specific fields if non-empty dict * docs(anthropic.md): add citations api to anthropic docs
2025-04-26 19:24:27 +00:00 · 2025-02-07 22:27:01 -08:00 · 2025-02-07 22:27:01 -08:00 · 7759e86cf5
commit 7759e86cf5
parent 8a8e8f6cdd
9 changed files with 308 additions and 21 deletions
--- a/docs/my-website/docs/providers/anthropic.md
+++ b/docs/my-website/docs/providers/anthropic.md
@ -987,6 +987,106 @@ curl http://0.0.0.0:4000/v1/chat/completions \
 </TabItem>
 </Tabs>

+## [BETA] Citations API 
+
+Pass `citations: {"enabled": true}` to Anthropic, to get citations on your document responses. 
+
+Note: This interface is in BETA. If you have feedback on how citations should be returned, please [tell us here](https://github.com/BerriAI/litellm/issues/7970#issuecomment-2644437943)
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import completion
+
+resp = completion(
+    model="claude-3-5-sonnet-20241022",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "document",
+                    "source": {
+                        "type": "text",
+                        "media_type": "text/plain",
+                        "data": "The grass is green. The sky is blue.",
+                    },
+                    "title": "My Document",
+                    "context": "This is a trustworthy document.",
+                    "citations": {"enabled": True},
+                },
+                {
+                    "type": "text",
+                    "text": "What color is the grass and sky?",
+                },
+            ],
+        }
+    ],
+)
+
+citations = resp.choices[0].message.provider_specific_fields["citations"]
+
+assert citations is not None
+```
+
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+    - model_name: anthropic-claude
+      litellm_params:
+        model: anthropic/claude-3-5-sonnet-20241022
+        api_key: os.environ/ANTHROPIC_API_KEY
+```
+
+2. Start proxy 
+
+```bash
+litellm --config /path/to/config.yaml
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+3. Test it! 
+
+```bash
+curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-d '{
+  "model": "anthropic-claude",
+  "messages": [
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "document",
+                "source": {
+                    "type": "text",
+                    "media_type": "text/plain",
+                    "data": "The grass is green. The sky is blue.",
+                },
+                "title": "My Document",
+                "context": "This is a trustworthy document.",
+                "citations": {"enabled": True},
+            },
+            {
+                "type": "text",
+                "text": "What color is the grass and sky?",
+            },
+        ],
+    }
+  ]
+}'
+```
+
+</TabItem>
+</Tabs>
+
 ## Usage - passing 'user_id' to Anthropic

 LiteLLM translates the OpenAI `user` param to Anthropic's `metadata[user_id]` param.
--- a/litellm/litellm_core_utils/prompt_templates/factory.py
+++ b/litellm/litellm_core_utils/prompt_templates/factory.py
@ -1421,6 +1421,8 @@ def anthropic_messages_pt(  # noqa: PLR0915
                            )

                            user_content.append(_content_element)
+                        elif m.get("type", "") == "document":
+                            user_content.append(cast(AnthropicMessagesDocumentParam, m))
                elif isinstance(user_message_types_block["content"], str):
                    _anthropic_content_text_element: AnthropicMessagesTextParam = {
                        "type": "text",
--- a/litellm/litellm_core_utils/streaming_handler.py
+++ b/litellm/litellm_core_utils/streaming_handler.py
@ -809,7 +809,10 @@ class CustomStreamWrapper:
                    if self.sent_first_chunk is False:
                        completion_obj["role"] = "assistant"
                        self.sent_first_chunk = True
-
+                    if response_obj.get("provider_specific_fields") is not None:
+                        completion_obj["provider_specific_fields"] = response_obj[
+                            "provider_specific_fields"
+                        ]
                    model_response.choices[0].delta = Delta(**completion_obj)
                    _index: Optional[int] = completion_obj.get("index")
                    if _index is not None:
--- a/litellm/llms/anthropic/chat/handler.py
+++ b/litellm/llms/anthropic/chat/handler.py
@ -4,7 +4,7 @@ Calling + translation logic for anthropic's `/v1/messages` endpoint

 import copy
 import json
-from typing import Any, Callable, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union

 import httpx  # type: ignore

@ -506,6 +506,29 @@ class ModelResponseIterator:

        return usage_block

+    def _content_block_delta_helper(self, chunk: dict):
+        text = ""
+        tool_use: Optional[ChatCompletionToolCallChunk] = None
+        provider_specific_fields = {}
+        content_block = ContentBlockDelta(**chunk)  # type: ignore
+        self.content_blocks.append(content_block)
+        if "text" in content_block["delta"]:
+            text = content_block["delta"]["text"]
+        elif "partial_json" in content_block["delta"]:
+            tool_use = {
+                "id": None,
+                "type": "function",
+                "function": {
+                    "name": None,
+                    "arguments": content_block["delta"]["partial_json"],
+                },
+                "index": self.tool_index,
+            }
+        elif "citation" in content_block["delta"]:
+            provider_specific_fields["citation"] = content_block["delta"]["citation"]
+
+        return text, tool_use, provider_specific_fields
+
    def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
        try:
            type_chunk = chunk.get("type", "") or ""
@ -515,6 +538,7 @@ class ModelResponseIterator:
            is_finished = False
            finish_reason = ""
            usage: Optional[ChatCompletionUsageBlock] = None
+            provider_specific_fields: Dict[str, Any] = {}

            index = int(chunk.get("index", 0))
            if type_chunk == "content_block_delta":
@ -522,20 +546,9 @@ class ModelResponseIterator:
                Anthropic content chunk
                chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
                """
-                content_block = ContentBlockDelta(**chunk)  # type: ignore
-                self.content_blocks.append(content_block)
-                if "text" in content_block["delta"]:
-                    text = content_block["delta"]["text"]
-                elif "partial_json" in content_block["delta"]:
-                    tool_use = {
-                        "id": None,
-                        "type": "function",
-                        "function": {
-                            "name": None,
-                            "arguments": content_block["delta"]["partial_json"],
-                        },
-                        "index": self.tool_index,
-                    }
+                text, tool_use, provider_specific_fields = (
+                    self._content_block_delta_helper(chunk=chunk)
+                )
            elif type_chunk == "content_block_start":
                """
                event: content_block_start
@ -628,6 +641,9 @@ class ModelResponseIterator:
                finish_reason=finish_reason,
                usage=usage,
                index=index,
+                provider_specific_fields=(
+                    provider_specific_fields if provider_specific_fields else None
+                ),
            )

            return returned_chunk
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -628,6 +628,7 @@ class AnthropicConfig(BaseConfig):
            )
        else:
            text_content = ""
+            citations: List[Any] = []
            tool_calls: List[ChatCompletionToolCallChunk] = []
            for idx, content in enumerate(completion_response["content"]):
                if content["type"] == "text":
@ -645,10 +646,14 @@ class AnthropicConfig(BaseConfig):
                            index=idx,
                        )
                    )
+                ## CITATIONS
+                if content.get("citations", None) is not None:
+                    citations.append(content["citations"])

            _message = litellm.Message(
                tool_calls=tool_calls,
                content=text_content or None,
+                provider_specific_fields={"citations": citations},
            )

            ## HANDLE JSON MODE - anthropic returns single function call
--- a/litellm/types/llms/anthropic.py
+++ b/litellm/types/llms/anthropic.py
@ -92,10 +92,17 @@ class AnthropicMessagesImageParam(TypedDict, total=False):
    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]


+class CitationsObject(TypedDict):
+    enabled: bool
+
+
 class AnthropicMessagesDocumentParam(TypedDict, total=False):
    type: Required[Literal["document"]]
    source: Required[AnthropicContentParamSource]
    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+    title: str
+    context: str
+    citations: Optional[CitationsObject]


 class AnthropicMessagesToolResultContent(TypedDict):
@ -173,6 +180,11 @@ class ContentTextBlockDelta(TypedDict):
    text: str


+class ContentCitationsBlockDelta(TypedDict):
+    type: Literal["citations"]
+    citation: dict
+
+
 class ContentJsonBlockDelta(TypedDict):
    """
    "delta": {"type": "input_json_delta","partial_json": "{\"location\": \"San Fra"}}
@ -185,7 +197,9 @@ class ContentJsonBlockDelta(TypedDict):
 class ContentBlockDelta(TypedDict):
    type: Literal["content_block_delta"]
    index: int
-    delta: Union[ContentTextBlockDelta, ContentJsonBlockDelta]
+    delta: Union[
+        ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta
+    ]


 class ContentBlockStop(TypedDict):
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -382,10 +382,29 @@ class ChatCompletionAudioObject(ChatCompletionContentPartInputAudioParam):
    pass


+class DocumentObject(TypedDict):
+    type: Literal["text"]
+    media_type: str
+    data: str
+
+
+class CitationsObject(TypedDict):
+    enabled: bool
+
+
+class ChatCompletionDocumentObject(TypedDict):
+    type: Literal["document"]
+    source: DocumentObject
+    title: str
+    context: str
+    citations: Optional[CitationsObject]
+
+
 OpenAIMessageContentListBlock = Union[
    ChatCompletionTextObject,
    ChatCompletionImageObject,
    ChatCompletionAudioObject,
+    ChatCompletionDocumentObject,
 ]

 OpenAIMessageContent = Union[
@ -460,6 +479,7 @@ ValidUserMessageContentTypes = [
    "text",
    "image_url",
    "input_audio",
+    "document",
 ]  # used for validating user messages. Prevent users from accidentally sending anthropic messages.

 AllMessageValues = Union[
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -551,6 +551,7 @@ class Delta(OpenAIObject):
    ):
        super(Delta, self).__init__(**params)
        provider_specific_fields: Dict[str, Any] = {}
+
        if "reasoning_content" in params:
            provider_specific_fields["reasoning_content"] = params["reasoning_content"]
            setattr(self, "reasoning_content", params["reasoning_content"])
--- a/tests/llm_translation/test_anthropic_completion.py
+++ b/tests/llm_translation/test_anthropic_completion.py
@ -1022,10 +1022,26 @@ def test_anthropic_json_mode_and_tool_call_response(
    [
        ("stop", ["stop"], True),  # basic string
        (["stop1", "stop2"], ["stop1", "stop2"], True),  # list of strings
-        ("   ", None, True),  # whitespace string should be dropped when drop_params is True
-        ("   ", ["   "], False),  # whitespace string should be kept when drop_params is False
-        (["stop1", "  ", "stop2"], ["stop1", "stop2"], True),  # list with whitespace that should be filtered
-        (["stop1", "  ", "stop2"], ["stop1", "  ", "stop2"], False),  # list with whitespace that should be kept
+        (
+            "   ",
+            None,
+            True,
+        ),  # whitespace string should be dropped when drop_params is True
+        (
+            "   ",
+            ["   "],
+            False,
+        ),  # whitespace string should be kept when drop_params is False
+        (
+            ["stop1", "  ", "stop2"],
+            ["stop1", "stop2"],
+            True,
+        ),  # list with whitespace that should be filtered
+        (
+            ["stop1", "  ", "stop2"],
+            ["stop1", "  ", "stop2"],
+            False,
+        ),  # list with whitespace that should be kept
        (None, None, True),  # None input
    ],
 )
@ -1035,3 +1051,113 @@ def test_map_stop_sequences(stop_input, expected_output, drop_params):
    config = AnthropicConfig()
    result = config._map_stop_sequences(stop_input)
    assert result == expected_output
+
+
+@pytest.mark.asyncio
+async def test_anthropic_structured_output():
+    """
+    Test the _transform_response_for_structured_output
+
+    Relevant Issue: https://github.com/BerriAI/litellm/issues/8291
+    """
+    from litellm import acompletion
+
+    args = {
+        "model": "claude-3-5-sonnet-20240620",
+        "seed": 3015206306868917280,
+        "stop": None,
+        "messages": [
+            {
+                "role": "system",
+                "content": 'You are a hello world agent.\nAlways respond in the following valid JSON format: {\n  "response": "response",\n}\n',
+            },
+            {"role": "user", "content": "Respond with hello world"},
+        ],
+        "temperature": 0,
+        "response_format": {"type": "json_object"},
+        "drop_params": True,
+    }
+
+    response = await acompletion(**args)
+    assert response is not None
+
+    print(response)
+
+
+def test_anthropic_citations_api():
+    """
+    Test the citations API
+    """
+    from litellm import completion
+
+    resp = completion(
+        model="claude-3-5-sonnet-20241022",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "document",
+                        "source": {
+                            "type": "text",
+                            "media_type": "text/plain",
+                            "data": "The grass is green. The sky is blue.",
+                        },
+                        "title": "My Document",
+                        "context": "This is a trustworthy document.",
+                        "citations": {"enabled": True},
+                    },
+                    {
+                        "type": "text",
+                        "text": "What color is the grass and sky?",
+                    },
+                ],
+            }
+        ],
+    )
+
+    citations = resp.choices[0].message.provider_specific_fields["citations"]
+
+    assert citations is not None
+
+
+def test_anthropic_citations_api_streaming():
+    from litellm import completion
+
+    resp = completion(
+        model="claude-3-5-sonnet-20241022",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "document",
+                        "source": {
+                            "type": "text",
+                            "media_type": "text/plain",
+                            "data": "The grass is green. The sky is blue.",
+                        },
+                        "title": "My Document",
+                        "context": "This is a trustworthy document.",
+                        "citations": {"enabled": True},
+                    },
+                    {
+                        "type": "text",
+                        "text": "What color is the grass and sky?",
+                    },
+                ],
+            }
+        ],
+        stream=True,
+    )
+
+    has_citations = False
+    for chunk in resp:
+        print(f"returned chunk: {chunk}")
+        if (
+            chunk.choices[0].delta.provider_specific_fields
+            and "citation" in chunk.choices[0].delta.provider_specific_fields
+        ):
+            has_citations = True
+
+    assert has_citations