feat(vertex_ai_anthropic.py): support response_schema for vertex ai anthropic calls

allows passing response_schema for anthropic calls. supports schema validation.
2024-07-18 16:57:38 -07:00 · 2024-07-18 16:57:38 -07:00 · f2401d6d5e
commit f2401d6d5e
parent f8bdfe7cc3
6 changed files with 189 additions and 48 deletions
--- a/litellm/litellm_core_utils/json_validation_rule.py
+++ b/litellm/litellm_core_utils/json_validation_rule.py
@ -13,7 +13,12 @@ def validate_schema(schema: dict, response: str):
    from litellm import JSONSchemaValidationError
-    response_dict = json.loads(response)
+    try:
        response_dict = json.loads(response)
    except json.JSONDecodeError:
        raise JSONSchemaValidationError(
            model="", llm_provider="", raw_response=response, schema=response
        )
    try:
        validate(response_dict, schema=schema)
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -16,6 +16,7 @@ from litellm import verbose_logger
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    HTTPHandler,
    _get_async_httpx_client,
    _get_httpx_client,
 )
@ -538,7 +539,7 @@ class AnthropicChatCompletion(BaseLLM):
    def __init__(self) -> None:
        super().__init__()
-    def process_response(
+    def _process_response(
        self,
        model: str,
        response: Union[requests.Response, httpx.Response],
@ -551,6 +552,7 @@ class AnthropicChatCompletion(BaseLLM):
        messages: List,
        print_verbose,
        encoding,
        json_mode: bool,
    ) -> ModelResponse:
        ## LOGGING
        logging_obj.post_call(
@ -574,27 +576,40 @@ class AnthropicChatCompletion(BaseLLM):
            )
        else:
            text_content = ""
-            tool_calls = []
+            tool_calls: List[ChatCompletionToolCallChunk] = []
-            for content in completion_response["content"]:
+            for idx, content in enumerate(completion_response["content"]):
                if content["type"] == "text":
                    text_content += content["text"]
                ## TOOL CALLING
                elif content["type"] == "tool_use":
                    tool_calls.append(
-                        {
+                        ChatCompletionToolCallChunk(
-                            "id": content["id"],
+                            id=content["id"],
-                            "type": "function",
+                            type="function",
-                            "function": {
+                            function=ChatCompletionToolCallFunctionChunk(
-                                "name": content["name"],
+                                name=content["name"],
-                                "arguments": json.dumps(content["input"]),
+                                arguments=json.dumps(content["input"]),
-                            },
+                            ),
-                        }
+                            index=idx,
                        )
                    )
            _message = litellm.Message(
                tool_calls=tool_calls,
                content=text_content or None,
            )
            ## HANDLE JSON MODE - anthropic returns single function call
            if json_mode and len(tool_calls) == 1:
                json_mode_content_str: Optional[str] = tool_calls[0]["function"].get(
                    "arguments"
                )
                if json_mode_content_str is not None:
                    args = json.loads(json_mode_content_str)
                    values: Optional[dict] = args.get("values")
                    if values is not None:
                        _message = litellm.Message(content=json.dumps(values))
                        completion_response["stop_reason"] = "stop"
            model_response.choices[0].message = _message  # type: ignore
            model_response._hidden_params["original_response"] = completion_response[
                "content"
@ -687,9 +702,11 @@ class AnthropicChatCompletion(BaseLLM):
        _is_function_call,
        data: dict,
        optional_params: dict,
        json_mode: bool,
        litellm_params=None,
        logger_fn=None,
        headers={},
        client=None,
    ) -> Union[ModelResponse, CustomStreamWrapper]:
        async_handler = _get_async_httpx_client()
@ -705,7 +722,7 @@ class AnthropicChatCompletion(BaseLLM):
            )
            raise e
-        return self.process_response(
+        return self._process_response(
            model=model,
            response=response,
            model_response=model_response,
@ -717,6 +734,7 @@ class AnthropicChatCompletion(BaseLLM):
            print_verbose=print_verbose,
            optional_params=optional_params,
            encoding=encoding,
            json_mode=json_mode,
        )
    def completion(
@ -731,10 +749,12 @@ class AnthropicChatCompletion(BaseLLM):
        api_key,
        logging_obj,
        optional_params: dict,
        timeout: Union[float, httpx.Timeout],
        acompletion=None,
        litellm_params=None,
        logger_fn=None,
        headers={},
        client=None,
    ):
        headers = validate_environment(api_key, headers, model)
        _is_function_call = False
@ -787,14 +807,18 @@ class AnthropicChatCompletion(BaseLLM):
            anthropic_tools = []
            for tool in optional_params["tools"]:
-                new_tool = tool["function"]
+                if "input_schema" in tool:  # assume in anthropic format
-                new_tool["input_schema"] = new_tool.pop("parameters")  # rename key
+                    anthropic_tools.append(tool)
-                anthropic_tools.append(new_tool)
+                else:  # assume openai tool call
                    new_tool = tool["function"]
                    new_tool["input_schema"] = new_tool.pop("parameters")  # rename key
                    anthropic_tools.append(new_tool)
            optional_params["tools"] = anthropic_tools
        stream = optional_params.pop("stream", None)
        is_vertex_request: bool = optional_params.pop("is_vertex_request", False)
        json_mode: bool = optional_params.pop("json_mode", False)
        data = {
            "messages": messages,
@ -815,7 +839,7 @@ class AnthropicChatCompletion(BaseLLM):
            },
        )
        print_verbose(f"_is_function_call: {_is_function_call}")
-        if acompletion == True:
+        if acompletion is True:
            if (
                stream is True
            ):  # if function call - fake the streaming (need complete blocks for output parsing in openai format)
@ -857,15 +881,21 @@ class AnthropicChatCompletion(BaseLLM):
                    litellm_params=litellm_params,
                    logger_fn=logger_fn,
                    headers=headers,
                    client=client,
                    json_mode=json_mode,
                )
        else:
            ## COMPLETION CALL
            if client is None or isinstance(client, AsyncHTTPHandler):
                client = HTTPHandler(timeout=timeout)  # type: ignore
            else:
                client = client
            if (
                stream is True
            ):  # if function call - fake the streaming (need complete blocks for output parsing in openai format)
                print_verbose("makes anthropic streaming POST request")
                data["stream"] = stream
-                response = requests.post(
+                response = client.post(
                    api_base,
                    headers=headers,
                    data=json.dumps(data),
@ -889,15 +919,13 @@ class AnthropicChatCompletion(BaseLLM):
                return streaming_response
            else:
-                response = requests.post(
+                response = client.post(api_base, headers=headers, data=json.dumps(data))
                    api_base, headers=headers, data=json.dumps(data)
                )
                if response.status_code != 200:
                    raise AnthropicError(
                        status_code=response.status_code, message=response.text
                    )
-        return self.process_response(
+        return self._process_response(
            model=model,
            response=response,
            model_response=model_response,
@ -909,6 +937,7 @@ class AnthropicChatCompletion(BaseLLM):
            print_verbose=print_verbose,
            optional_params=optional_params,
            encoding=encoding,
            json_mode=json_mode,
        )
    def embedding(self):
--- a/litellm/llms/vertex_ai_anthropic.py
+++ b/litellm/llms/vertex_ai_anthropic.py
@ -7,7 +7,7 @@ import time
 import types
 import uuid
 from enum import Enum
-from typing import Any, Callable, List, Optional, Tuple
+from typing import Any, Callable, List, Optional, Tuple, Union
 import httpx  # type: ignore
 import requests  # type: ignore
@ -15,7 +15,14 @@ import requests  # type: ignore
 import litellm
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
-from litellm.types.llms.anthropic import AnthropicMessagesToolChoice
+from litellm.types.llms.anthropic import (
    AnthropicMessagesTool,
    AnthropicMessagesToolChoice,
 )
 from litellm.types.llms.openai import (
    ChatCompletionToolParam,
    ChatCompletionToolParamFunctionChunk,
 )
 from litellm.types.utils import ResponseFormatChunk
 from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
@ -142,7 +149,27 @@ class VertexAIAnthropicConfig:
            if param == "top_p":
                optional_params["top_p"] = value
            if param == "response_format" and "response_schema" in value:
-                optional_params["response_format"] = ResponseFormatChunk(**value)  # type: ignore
+                """
                When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode
                - You usually want to provide a single tool
                - You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool
                - Remember that the model will pass the input to the tool, so the name of the tool and description should be from the model’s perspective.
                """
                _tool_choice = None
                _tool_choice = {"name": "json_tool_call", "type": "tool"}
                _tool = AnthropicMessagesTool(
                    name="json_tool_call",
                    input_schema={
                        "type": "object",
                        "properties": {"values": value["response_schema"]},  # type: ignore
                    },
                )
                optional_params["tools"] = [_tool]
                optional_params["tool_choice"] = _tool_choice
                optional_params["json_mode"] = True
        return optional_params
@ -222,6 +249,7 @@ def completion(
    optional_params: dict,
    custom_prompt_dict: dict,
    headers: Optional[dict],
    timeout: Union[float, httpx.Timeout],
    vertex_project=None,
    vertex_location=None,
    vertex_credentials=None,
@ -301,6 +329,8 @@ def completion(
            litellm_params=litellm_params,
            logger_fn=logger_fn,
            headers=vertex_headers,
            client=client,
            timeout=timeout,
        )
    except Exception as e:
--- a/litellm/main.py
+++ b/litellm/main.py
@ -1528,6 +1528,8 @@ def completion(
                    api_key=api_key,
                    logging_obj=logging,
                    headers=headers,
                    timeout=timeout,
                    client=client,
                )
            if optional_params.get("stream", False) or acompletion == True:
                ## LOGGING
@ -2046,7 +2048,10 @@ def completion(
                    acompletion=acompletion,
                    headers=headers,
                    custom_prompt_dict=custom_prompt_dict,
                    timeout=timeout,
                    client=client,
                )
            else:
                model_response = vertex_ai.completion(
                    model=model,
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,5 +1,13 @@
 model_list:
-  - model_name: llama-3
+  - model_name: bad-azure-model
    litellm_params:
-      model: gpt-4
+      model: azure/chatgpt-v-2
-      request_timeout: 1
+      azure_ad_token: ""
      api_base: os.environ/AZURE_API_BASE
  - model_name: good-openai-model
    litellm_params:
      model: gpt-3.5-turbo
 litellm_settings:
  fallbacks: [{"bad-azure-model": ["good-openai-model"]}]
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@ -1128,6 +1128,39 @@ def vertex_httpx_mock_post_valid_response(*args, **kwargs):
    return mock_response
 def vertex_httpx_mock_post_valid_response_anthropic(*args, **kwargs):
    mock_response = MagicMock()
    mock_response.status_code = 200
    mock_response.headers = {"Content-Type": "application/json"}
    mock_response.json.return_value = {
        "id": "msg_vrtx_013Wki5RFQXAspL7rmxRFjZg",
        "type": "message",
        "role": "assistant",
        "model": "claude-3-5-sonnet-20240620",
        "content": [
            {
                "type": "tool_use",
                "id": "toolu_vrtx_01YMnYZrToPPfcmY2myP2gEB",
                "name": "json_tool_call",
                "input": {
                    "values": [
                        {"recipe_name": "Chocolate Chip Cookies"},
                        {"recipe_name": "Oatmeal Raisin Cookies"},
                        {"recipe_name": "Peanut Butter Cookies"},
                        {"recipe_name": "Snickerdoodle Cookies"},
                        {"recipe_name": "Sugar Cookies"},
                    ]
                },
            }
        ],
        "stop_reason": "tool_use",
        "stop_sequence": None,
        "usage": {"input_tokens": 368, "output_tokens": 118},
    }
    return mock_response
 def vertex_httpx_mock_post_invalid_schema_response(*args, **kwargs):
    mock_response = MagicMock()
    mock_response.status_code = 200
@ -1183,11 +1216,29 @@ def vertex_httpx_mock_post_invalid_schema_response(*args, **kwargs):
    return mock_response
 def vertex_httpx_mock_post_invalid_schema_response_anthropic(*args, **kwargs):
    mock_response = MagicMock()
    mock_response.status_code = 200
    mock_response.headers = {"Content-Type": "application/json"}
    mock_response.json.return_value = {
        "id": "msg_vrtx_013Wki5RFQXAspL7rmxRFjZg",
        "type": "message",
        "role": "assistant",
        "model": "claude-3-5-sonnet-20240620",
        "content": [{"text": "Hi! My name is Claude.", "type": "text"}],
        "stop_reason": "end_turn",
        "stop_sequence": None,
        "usage": {"input_tokens": 368, "output_tokens": 118},
    }
    return mock_response
@pytest.mark.parametrize(
    "model, vertex_location, supports_response_schema",
    [
        ("vertex_ai_beta/gemini-1.5-pro-001", "us-central1", True),
        ("vertex_ai_beta/gemini-1.5-flash", "us-central1", False),
        ("vertex_ai/claude-3-5-sonnet@20240620", "us-east5", False),
    ],
 )
@pytest.mark.parametrize(
@ -1231,12 +1282,21 @@ async def test_gemini_pro_json_schema_args_sent_httpx(
    httpx_response = MagicMock()
    if invalid_response is True:
-        httpx_response.side_effect = vertex_httpx_mock_post_invalid_schema_response
+        if "claude" in model:
            httpx_response.side_effect = (
                vertex_httpx_mock_post_invalid_schema_response_anthropic
            )
        else:
            httpx_response.side_effect = vertex_httpx_mock_post_invalid_schema_response
    else:
-        httpx_response.side_effect = vertex_httpx_mock_post_valid_response
+        if "claude" in model:
            httpx_response.side_effect = vertex_httpx_mock_post_valid_response_anthropic
        else:
            httpx_response.side_effect = vertex_httpx_mock_post_valid_response
    with patch.object(client, "post", new=httpx_response) as mock_call:
        print("SENDING CLIENT POST={}".format(client.post))
        try:
-            _ = completion(
+            resp = completion(
                model=model,
                messages=messages,
                response_format={
@ -1247,30 +1307,34 @@ async def test_gemini_pro_json_schema_args_sent_httpx(
                vertex_location=vertex_location,
                client=client,
            )
            print("Received={}".format(resp))
            if invalid_response is True and enforce_validation is True:
                pytest.fail("Expected this to fail")
        except litellm.JSONSchemaValidationError as e:
-            if invalid_response is False and "claude-3" not in model:
+            if invalid_response is False:
                pytest.fail("Expected this to pass. Got={}".format(e))
        mock_call.assert_called_once()
-        print(mock_call.call_args.kwargs)
+        if "claude" not in model:
-        print(mock_call.call_args.kwargs["json"]["generationConfig"])
+            print(mock_call.call_args.kwargs)
            print(mock_call.call_args.kwargs["json"]["generationConfig"])
-        if supports_response_schema:
+            if supports_response_schema:
-            assert (
+                assert (
-                "response_schema"
+                    "response_schema"
-                in mock_call.call_args.kwargs["json"]["generationConfig"]
+                    in mock_call.call_args.kwargs["json"]["generationConfig"]
-            )
+                )
-        else:
+            else:
-            assert (
+                assert (
-                "response_schema"
+                    "response_schema"
-                not in mock_call.call_args.kwargs["json"]["generationConfig"]
+                    not in mock_call.call_args.kwargs["json"]["generationConfig"]
-            )
+                )
-            assert (
+                assert (
-                "Use this JSON schema:"
+                    "Use this JSON schema:"
-                in mock_call.call_args.kwargs["json"]["contents"][0]["parts"][1]["text"]
+                    in mock_call.call_args.kwargs["json"]["contents"][0]["parts"][1][
-            )
+                        "text"
                    ]
                )
@pytest.mark.parametrize("provider", ["vertex_ai_beta"])  # "vertex_ai",