test(test_completion.py): add testing for anthropic vision calling

2024-03-04 13:34:49 -08:00 · 2024-03-04 13:34:49 -08:00 · edda2d9293
commit edda2d9293
parent 33afa53353
4 changed files with 81 additions and 5 deletions
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -499,7 +499,11 @@ def convert_to_anthropic_image_obj(openai_image_url: str):
    # Infer image format from the URL
    image_format = openai_image_url.split("data:image/")[1].split(";base64,")[0]
-    return {"type": "base64", "media_type": image_format, "data": base64_data}
+    return {
        "type": "base64",
        "media_type": f"image/{image_format}",
        "data": base64_data,
    }
 def anthropic_messages_pt(messages: list):
@ -515,10 +519,35 @@ def anthropic_messages_pt(messages: list):
    last_assistant_message_idx: Optional[int] = None
    # reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, add a blank 'user' or 'assistant' message to ensure compatibility
    new_messages = []
    if len(messages) == 1:
        # check if the message is a user message
        if messages[0]["role"] == "assistant":
            new_messages.append({"role": "user", "content": ""})
        # check if content is a list (vision)
        if isinstance(messages[0]["content"], list):  # vision input
            new_content = []
            for m in messages[0]["content"]:
                if m.get("type", "") == "image_url":
                    new_content.append(
                        {
                            "type": "image",
                            "source": convert_to_anthropic_image_obj(
                                m["image_url"]["url"]
                            ),
                        }
                    )
                elif m.get("type", "") == "text":
                    new_content.append({"type": "text", "text": m["text"]})
            new_messages.append({"role": messages[0]["role"], "content": new_content})  # type: ignore
        else:
            new_messages.append(messages[0])
        return new_messages
    for i in range(len(messages) - 1):  # type: ignore
        if i == 0 and messages[i]["role"] == "assistant":
            new_messages.append({"role": "user", "content": ""})
        if isinstance(messages[i]["content"], list):  # vision input
            new_content = []
            for m in messages[i]["content"]:
@ -546,8 +575,6 @@ def anthropic_messages_pt(messages: list):
        if messages[i]["role"] == "assistant":
            last_assistant_message_idx = i
    new_messages.append(messages[-1])
    if last_assistant_message_idx is not None:
        new_messages[last_assistant_message_idx]["content"] = new_messages[
            last_assistant_message_idx
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@ -351,7 +351,7 @@ def test_gemini_pro_vision_base64():
        load_vertex_ai_credentials()
        litellm.set_verbose = True
        litellm.num_retries = 3
-        image_path = "cached_logo.jpg"
+        image_path = "../proxy/cached_logo.jpg"
        # Getting the base64 string
        base64_image = encode_image(image_path)
        resp = litellm.completion(
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -159,6 +159,51 @@ def test_completion_claude_3_stream():
        pytest.fail(f"Error occurred: {e}")
 def encode_image(image_path):
    import base64
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")
@pytest.mark.skip(
    reason="we already test claude-3, this is just another way to pass images"
 )
 def test_completion_claude_3_base64():
    try:
        litellm.set_verbose = True
        litellm.num_retries = 3
        image_path = "../proxy/cached_logo.jpg"
        # Getting the base64 string
        base64_image = encode_image(image_path)
        resp = litellm.completion(
            model="anthropic/claude-3-opus-20240229",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "Whats in this image?"},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": "data:image/jpeg;base64," + base64_image
                            },
                        },
                    ],
                }
            ],
        )
        print(f"\nResponse: {resp}")
        prompt_tokens = resp.usage.prompt_tokens
        raise Exception("it worked!")
    except Exception as e:
        if "500 Internal error encountered.'" in str(e):
            pass
        else:
            pytest.fail(f"An exception occurred - {str(e)}")
 def test_completion_mistral_api():
    try:
        litellm.set_verbose = True
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -200,6 +200,10 @@ def map_finish_reason(
        return "content_filter"
    elif finish_reason == "STOP":  # vertex ai
        return "stop"
    elif finish_reason == "end_turn" or finish_reason == "stop_sequence":  # anthropic
        return "stop"
    elif finish_reason == "max_tokens":  # anthropic
        return "length"
    return finish_reason