litellm/tests/llm_translation/test_anthropic_completion.py
Krish Dholakia 6b9be5092f
LiteLLM Minor Fixes & Improvements (10/28/2024) (#6475)
* fix(anthropic/chat/transformation.py): support anthropic disable_parallel_tool_use param

Fixes https://github.com/BerriAI/litellm/issues/6456

* feat(anthropic/chat/transformation.py): support anthropic computer tool use

Closes https://github.com/BerriAI/litellm/issues/6427

* fix(vertex_ai/common_utils.py): parse out '$schema' when calling vertex ai

Fixes issue when trying to call vertex from vercel sdk

* fix(main.py): add 'extra_headers' support for azure on all translation endpoints

Fixes https://github.com/BerriAI/litellm/issues/6465

* fix: fix linting errors

* fix(transformation.py): handle no beta headers for anthropic

* test: cleanup test

* fix: fix linting error

* fix: fix linting errors

* fix: fix linting errors

* fix(transformation.py): handle dummy tool call

* fix(main.py): fix linting error

* fix(azure.py): pass required param

* LiteLLM Minor Fixes & Improvements (10/24/2024) (#6441)

* fix(azure.py): handle /openai/deployment in azure api base

* fix(factory.py): fix faulty anthropic tool result translation check

Fixes https://github.com/BerriAI/litellm/issues/6422

* fix(gpt_transformation.py): add support for parallel_tool_calls to azure

Fixes https://github.com/BerriAI/litellm/issues/6440

* fix(factory.py): support anthropic prompt caching for tool results

* fix(vertex_ai/common_utils): don't pop non-null required field

Fixes https://github.com/BerriAI/litellm/issues/6426

* feat(vertex_ai.py): support code_execution tool call for vertex ai + gemini

Closes https://github.com/BerriAI/litellm/issues/6434

* build(model_prices_and_context_window.json): Add 'supports_assistant_prefill' for bedrock claude-3-5-sonnet v2 models

Closes https://github.com/BerriAI/litellm/issues/6437

* fix(types/utils.py): fix linting

* test: update test to include required fields

* test: fix test

* test: handle flaky test

* test: remove e2e test - hitting gemini rate limits

* Litellm dev 10 26 2024 (#6472)

* docs(exception_mapping.md): add missing exception types

Fixes https://github.com/Aider-AI/aider/issues/2120#issuecomment-2438971183

* fix(main.py): register custom model pricing with specific key

Ensure custom model pricing is registered to the specific model+provider key combination

* test: make testing more robust for custom pricing

* fix(redis_cache.py): instrument otel logging for sync redis calls

ensures complete coverage for all redis cache calls

* (Testing) Add unit testing for DualCache - ensure in memory cache is used when expected  (#6471)

* test test_dual_cache_get_set

* unit testing for dual cache

* fix async_set_cache_sadd

* test_dual_cache_local_only

* redis otel tracing + async support for latency routing (#6452)

* docs(exception_mapping.md): add missing exception types

Fixes https://github.com/Aider-AI/aider/issues/2120#issuecomment-2438971183

* fix(main.py): register custom model pricing with specific key

Ensure custom model pricing is registered to the specific model+provider key combination

* test: make testing more robust for custom pricing

* fix(redis_cache.py): instrument otel logging for sync redis calls

ensures complete coverage for all redis cache calls

* refactor: pass parent_otel_span for redis caching calls in router

allows for more observability into what calls are causing latency issues

* test: update tests with new params

* refactor: ensure e2e otel tracing for router

* refactor(router.py): add more otel tracing acrosss router

catch all latency issues for router requests

* fix: fix linting error

* fix(router.py): fix linting error

* fix: fix test

* test: fix tests

* fix(dual_cache.py): pass ttl to redis cache

* fix: fix param

* fix(dual_cache.py): set default value for parent_otel_span

* fix(transformation.py): support 'response_format' for anthropic calls

* fix(transformation.py): check for cache_control inside 'function' block

* fix: fix linting error

* fix: fix linting errors

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
2024-10-29 17:20:24 -07:00

624 lines
20 KiB
Python

# What is this?
## Unit tests for Anthropic Adapter
import asyncio
import os
import sys
import traceback
from dotenv import load_dotenv
import litellm.types
import litellm.types.utils
from litellm.llms.anthropic.chat import ModelResponseIterator
load_dotenv()
import io
import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from typing import Optional
from unittest.mock import MagicMock, patch
import pytest
import litellm
from litellm import (
AnthropicConfig,
Router,
adapter_completion,
AnthropicExperimentalPassThroughConfig,
)
from litellm.adapters.anthropic_adapter import anthropic_adapter
from litellm.types.llms.anthropic import AnthropicResponse
from litellm.llms.anthropic.common_utils import process_anthropic_headers
from httpx import Headers
def test_anthropic_completion_messages_translation():
messages = [{"role": "user", "content": "Hey, how's it going?"}]
translated_messages = AnthropicExperimentalPassThroughConfig().translate_anthropic_messages_to_openai(messages=messages) # type: ignore
assert translated_messages == [{"role": "user", "content": "Hey, how's it going?"}]
def test_anthropic_completion_input_translation():
data = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "Hey, how's it going?"}],
}
translated_input = anthropic_adapter.translate_completion_input_params(kwargs=data)
assert translated_input is not None
assert translated_input["model"] == "gpt-3.5-turbo"
assert translated_input["messages"] == [
{"role": "user", "content": "Hey, how's it going?"}
]
def test_anthropic_completion_input_translation_with_metadata():
"""
Tests that cost tracking works as expected with LiteLLM Proxy
LiteLLM Proxy will insert litellm_metadata for anthropic endpoints to track user_api_key and user_api_key_team_id
This test ensures that the `litellm_metadata` is not present in the translated input
It ensures that `litellm.acompletion()` will receieve metadata which is a litellm specific param
"""
data = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "Hey, how's it going?"}],
"litellm_metadata": {
"user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
"user_api_key_alias": None,
"user_api_end_user_max_budget": None,
"litellm_api_version": "1.40.19",
"global_max_parallel_requests": None,
"user_api_key_user_id": "default_user_id",
"user_api_key_org_id": None,
"user_api_key_team_id": None,
"user_api_key_team_alias": None,
"user_api_key_team_max_budget": None,
"user_api_key_team_spend": None,
"user_api_key_spend": 0.0,
"user_api_key_max_budget": None,
"user_api_key_metadata": {},
},
}
translated_input = anthropic_adapter.translate_completion_input_params(kwargs=data)
assert "litellm_metadata" not in translated_input
assert "metadata" in translated_input
assert translated_input["metadata"] == data["litellm_metadata"]
def streaming_format_tests(chunk: dict, idx: int):
"""
1st chunk - chunk.get("type") == "message_start"
2nd chunk - chunk.get("type") == "content_block_start"
3rd chunk - chunk.get("type") == "content_block_delta"
"""
if idx == 0:
assert chunk.get("type") == "message_start"
elif idx == 1:
assert chunk.get("type") == "content_block_start"
elif idx == 2:
assert chunk.get("type") == "content_block_delta"
@pytest.mark.parametrize("stream", [True]) # False
def test_anthropic_completion_e2e(stream):
litellm.set_verbose = True
litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
messages = [{"role": "user", "content": "Hey, how's it going?"}]
response = adapter_completion(
model="gpt-3.5-turbo",
messages=messages,
adapter_id="anthropic",
mock_response="This is a fake call",
stream=stream,
)
print("Response: {}".format(response))
assert response is not None
if stream is False:
assert isinstance(response, AnthropicResponse)
else:
"""
- ensure finish reason is returned
- assert content block is started and stopped
- ensure last chunk is 'message_stop'
"""
assert isinstance(response, litellm.types.utils.AdapterCompletionStreamWrapper)
finish_reason: Optional[str] = None
message_stop_received = False
content_block_started = False
content_block_finished = False
for idx, chunk in enumerate(response):
print(chunk)
streaming_format_tests(chunk=chunk, idx=idx)
if chunk.get("delta", {}).get("stop_reason") is not None:
finish_reason = chunk.get("delta", {}).get("stop_reason")
if chunk.get("type") == "message_stop":
message_stop_received = True
if chunk.get("type") == "content_block_stop":
content_block_finished = True
if chunk.get("type") == "content_block_start":
content_block_started = True
assert content_block_started and content_block_finished
assert finish_reason is not None
assert message_stop_received is True
anthropic_chunk_list = [
{
"type": "content_block_start",
"index": 0,
"content_block": {"type": "text", "text": ""},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": "To"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " answer"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " your question about the weather"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " in Boston and Los"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " Angeles today, I'll"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " need to"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " use"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " the"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " get_current_weather"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " function"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " for"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " both"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " cities"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": ". Let"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " me fetch"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " that"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " information"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " for"},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": " you."},
},
{"type": "content_block_stop", "index": 0},
{
"type": "content_block_start",
"index": 1,
"content_block": {
"type": "tool_use",
"id": "toolu_12345",
"name": "get_current_weather",
"input": {},
},
},
{
"type": "content_block_delta",
"index": 1,
"delta": {"type": "input_json_delta", "partial_json": ""},
},
{
"type": "content_block_delta",
"index": 1,
"delta": {"type": "input_json_delta", "partial_json": '{"locat'},
},
{
"type": "content_block_delta",
"index": 1,
"delta": {"type": "input_json_delta", "partial_json": 'ion": "Bos'},
},
{
"type": "content_block_delta",
"index": 1,
"delta": {"type": "input_json_delta", "partial_json": 'ton, MA"}'},
},
{"type": "content_block_stop", "index": 1},
{
"type": "content_block_start",
"index": 2,
"content_block": {
"type": "tool_use",
"id": "toolu_023423423",
"name": "get_current_weather",
"input": {},
},
},
{
"type": "content_block_delta",
"index": 2,
"delta": {"type": "input_json_delta", "partial_json": ""},
},
{
"type": "content_block_delta",
"index": 2,
"delta": {"type": "input_json_delta", "partial_json": '{"l'},
},
{
"type": "content_block_delta",
"index": 2,
"delta": {"type": "input_json_delta", "partial_json": "oca"},
},
{
"type": "content_block_delta",
"index": 2,
"delta": {"type": "input_json_delta", "partial_json": "tio"},
},
{
"type": "content_block_delta",
"index": 2,
"delta": {"type": "input_json_delta", "partial_json": 'n": "Lo'},
},
{
"type": "content_block_delta",
"index": 2,
"delta": {"type": "input_json_delta", "partial_json": "s Angel"},
},
{
"type": "content_block_delta",
"index": 2,
"delta": {"type": "input_json_delta", "partial_json": 'es, CA"}'},
},
{"type": "content_block_stop", "index": 2},
{
"type": "message_delta",
"delta": {"stop_reason": "tool_use", "stop_sequence": None},
"usage": {"output_tokens": 137},
},
{"type": "message_stop"},
]
def test_anthropic_tool_streaming():
"""
OpenAI starts tool_use indexes at 0 for the first tool, regardless of preceding text.
Anthropic gives tool_use indexes starting at the first chunk, meaning they often start at 1
when they should start at 0
"""
litellm.set_verbose = True
response_iter = ModelResponseIterator([], False)
# First index is 0, we'll start earlier because incrementing is easier
correct_tool_index = -1
for chunk in anthropic_chunk_list:
parsed_chunk = response_iter.chunk_parser(chunk)
if tool_use := parsed_chunk.get("tool_use"):
# We only increment when a new block starts
if tool_use.get("id") is not None:
correct_tool_index += 1
assert tool_use["index"] == correct_tool_index
def test_anthropic_tool_calling_translation():
kwargs = {
"model": "claude-3-5-sonnet-20240620",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Would development of a software platform be under ASC 350-40 or ASC 985?",
}
],
},
{
"role": "assistant",
"content": [
{
"type": "tool_use",
"id": "37d6f703-cbcc-497d-95a1-2aa24a114adc",
"name": "TaskPlanningTool",
"input": {
"completed_steps": [],
"next_steps": [
{
"tool_name": "AccountingResearchTool",
"description": "Research ASC 350-40 to understand its scope and applicability to software development.",
},
{
"tool_name": "AccountingResearchTool",
"description": "Research ASC 985 to understand its scope and applicability to software development.",
},
{
"tool_name": "AccountingResearchTool",
"description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development.",
},
],
"learnings": [],
"potential_issues": [
"The distinction between the two standards might not be clear-cut for all types of software development.",
"There might be specific circumstances or details about the software platform that could affect which standard applies.",
],
"missing_info": [
"Specific details about the type of software platform being developed (e.g., for internal use or for sale).",
"Whether the entity developing the software is also the end-user or if it's being developed for external customers.",
],
"done": False,
"required_formatting": None,
},
}
],
},
{
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "eb7023b1-5ee8-43b8-b90f-ac5a23d37c31",
"content": {
"completed_steps": [],
"next_steps": [
{
"tool_name": "AccountingResearchTool",
"description": "Research ASC 350-40 to understand its scope and applicability to software development.",
},
{
"tool_name": "AccountingResearchTool",
"description": "Research ASC 985 to understand its scope and applicability to software development.",
},
{
"tool_name": "AccountingResearchTool",
"description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development.",
},
],
"formatting_step": None,
},
}
],
},
],
}
from litellm.adapters.anthropic_adapter import anthropic_adapter
translated_params = anthropic_adapter.translate_completion_input_params(
kwargs=kwargs
)
print(translated_params["messages"])
assert len(translated_params["messages"]) > 0
assert translated_params["messages"][0]["role"] == "user"
def test_process_anthropic_headers_empty():
result = process_anthropic_headers({})
assert result == {}, "Expected empty dictionary for no input"
def test_process_anthropic_headers_with_all_headers():
input_headers = Headers(
{
"anthropic-ratelimit-requests-limit": "100",
"anthropic-ratelimit-requests-remaining": "90",
"anthropic-ratelimit-tokens-limit": "10000",
"anthropic-ratelimit-tokens-remaining": "9000",
"other-header": "value",
}
)
expected_output = {
"x-ratelimit-limit-requests": "100",
"x-ratelimit-remaining-requests": "90",
"x-ratelimit-limit-tokens": "10000",
"x-ratelimit-remaining-tokens": "9000",
"llm_provider-anthropic-ratelimit-requests-limit": "100",
"llm_provider-anthropic-ratelimit-requests-remaining": "90",
"llm_provider-anthropic-ratelimit-tokens-limit": "10000",
"llm_provider-anthropic-ratelimit-tokens-remaining": "9000",
"llm_provider-other-header": "value",
}
result = process_anthropic_headers(input_headers)
assert result == expected_output, "Unexpected output for all Anthropic headers"
def test_process_anthropic_headers_with_partial_headers():
input_headers = Headers(
{
"anthropic-ratelimit-requests-limit": "100",
"anthropic-ratelimit-tokens-remaining": "9000",
"other-header": "value",
}
)
expected_output = {
"x-ratelimit-limit-requests": "100",
"x-ratelimit-remaining-tokens": "9000",
"llm_provider-anthropic-ratelimit-requests-limit": "100",
"llm_provider-anthropic-ratelimit-tokens-remaining": "9000",
"llm_provider-other-header": "value",
}
result = process_anthropic_headers(input_headers)
assert result == expected_output, "Unexpected output for partial Anthropic headers"
def test_process_anthropic_headers_with_no_matching_headers():
input_headers = Headers(
{"unrelated-header-1": "value1", "unrelated-header-2": "value2"}
)
expected_output = {
"llm_provider-unrelated-header-1": "value1",
"llm_provider-unrelated-header-2": "value2",
}
result = process_anthropic_headers(input_headers)
assert result == expected_output, "Unexpected output for non-matching headers"
def test_anthropic_computer_tool_use():
from litellm import completion
tools = [
{
"type": "computer_20241022",
"function": {
"name": "computer",
"parameters": {
"display_height_px": 100,
"display_width_px": 100,
"display_number": 1,
},
},
}
]
model = "claude-3-5-sonnet-20241022"
messages = [{"role": "user", "content": "Save a picture of a cat to my desktop."}]
resp = completion(
model=model,
messages=messages,
tools=tools,
# headers={"anthropic-beta": "computer-use-2024-10-22"},
)
print(resp)
@pytest.mark.parametrize(
"computer_tool_used, prompt_caching_set, expected_beta_header",
[
(True, False, True),
(False, True, True),
(True, True, True),
(False, False, False),
],
)
def test_anthropic_beta_header(
computer_tool_used, prompt_caching_set, expected_beta_header
):
headers = litellm.AnthropicConfig().get_anthropic_headers(
api_key="fake-api-key",
computer_tool_used=computer_tool_used,
prompt_caching_set=prompt_caching_set,
)
if expected_beta_header:
assert "anthropic-beta" in headers
else:
assert "anthropic-beta" not in headers
@pytest.mark.parametrize(
"cache_control_location",
[
"inside_function",
"outside_function",
],
)
def test_anthropic_tool_helper(cache_control_location):
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
tool = {
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
},
},
"required": ["location"],
},
},
}
if cache_control_location == "inside_function":
tool["function"]["cache_control"] = {"type": "ephemeral"}
else:
tool["cache_control"] = {"type": "ephemeral"}
tool = AnthropicConfig()._map_tool_helper(tool=tool)
assert tool["cache_control"] == {"type": "ephemeral"}