litellm-mirror/tests/pass_through_tests/test_anthropic_passthrough.py
Ishaan Jaff f47987e673
(Refactor) /v1/messages to follow simpler logic for Anthropic API spec (#9013)
* anthropic_messages_handler v0

* fix /messages

* working messages with router methods

* test_anthropic_messages_handler_litellm_router_non_streaming

* test_anthropic_messages_litellm_router_non_streaming_with_logging

* AnthropicMessagesConfig

* _handle_anthropic_messages_response_logging

* working with /v1/messages endpoint

* working /v1/messages endpoint

* refactor to use router factory function

* use aanthropic_messages

* use BaseConfig for Anthropic /v1/messages

* track api key, team on /v1/messages endpoint

* fix get_logging_payload

* BaseAnthropicMessagesTest

* align test config

* test_anthropic_messages_with_thinking

* test_anthropic_streaming_with_thinking

* fix - display anthropic url for debugging

* test_bad_request_error_handling

* test_anthropic_messages_router_streaming_with_bad_request

* fix ProxyException

* test_bad_request_error_handling_streaming

* use provider_specific_header

* test_anthropic_messages_with_extra_headers

* test_anthropic_messages_to_wildcard_model

* fix gcs pub sub test

* standard_logging_payload

* fix unit testing for anthopic /v1/messages support

* fix pass through anthropic messages api

* delete dead code

* fix anthropic pass through response

* revert change to spend tracking utils

* fix get_litellm_metadata_from_kwargs

* fix spend logs payload json

* proxy_pass_through_endpoint_tests

* TestAnthropicPassthroughBasic

* fix pass through tests

* test_async_vertex_proxy_route_api_key_auth

* _handle_anthropic_messages_response_logging

* vertex_credentials

* test_set_default_vertex_config

* test_anthropic_messages_litellm_router_non_streaming_with_logging

* test_ageneric_api_call_with_fallbacks_basic

* test__aadapter_completion
2025-03-06 00:43:08 -08:00

263 lines
11 KiB
Python

"""
This test ensures that the proxy can passthrough anthropic requests
"""
import pytest
import anthropic
import aiohttp
import asyncio
import json
@pytest.mark.asyncio
async def test_anthropic_basic_completion_with_headers():
print("making basic completion request to anthropic passthrough with aiohttp")
headers = {
"Authorization": f"Bearer sk-1234",
"Content-Type": "application/json",
"Anthropic-Version": "2023-06-01",
}
payload = {
"model": "claude-3-5-sonnet-20241022",
"max_tokens": 10,
"messages": [{"role": "user", "content": "Say 'hello test' and nothing else"}],
"litellm_metadata": {
"tags": ["test-tag-1", "test-tag-2"],
},
}
async with aiohttp.ClientSession() as session:
async with session.post(
"http://0.0.0.0:4000/anthropic/v1/messages", json=payload, headers=headers
) as response:
response_text = await response.text()
print(f"Response text: {response_text}")
response_json = await response.json()
response_headers = response.headers
print(
"non-streaming response",
json.dumps(response_json, indent=4, default=str),
)
reported_usage = response_json.get("usage", None)
anthropic_api_input_tokens = reported_usage.get("input_tokens", None)
anthropic_api_output_tokens = reported_usage.get("output_tokens", None)
litellm_call_id = response_headers.get("x-litellm-call-id")
print(f"LiteLLM Call ID: {litellm_call_id}")
# Wait for spend to be logged
await asyncio.sleep(15)
# Check spend logs for this specific request
async with session.get(
f"http://0.0.0.0:4000/spend/logs?request_id={litellm_call_id}",
headers={"Authorization": "Bearer sk-1234"},
) as spend_response:
print("text spend response")
print(f"Spend response: {spend_response}")
spend_data = await spend_response.json()
print(f"Spend data: {spend_data}")
assert spend_data is not None, "Should have spend data for the request"
log_entry = spend_data[
0
] # Get the first (and should be only) log entry
# Basic existence checks
assert spend_data is not None, "Should have spend data for the request"
assert isinstance(log_entry, dict), "Log entry should be a dictionary"
# Request metadata assertions
assert (
log_entry["request_id"] == litellm_call_id
), "Request ID should match"
assert (
log_entry["call_type"] == "pass_through_endpoint"
), "Call type should be pass_through_endpoint"
assert (
log_entry["api_base"] == "https://api.anthropic.com/v1/messages"
), "API base should be Anthropic's endpoint"
# Token and spend assertions
assert log_entry["spend"] > 0, "Spend value should not be None"
assert isinstance(
log_entry["spend"], (int, float)
), "Spend should be a number"
assert log_entry["total_tokens"] > 0, "Should have some tokens"
assert (
log_entry["prompt_tokens"] == anthropic_api_input_tokens
), f"Should have prompt tokens matching anthropic api. Expected {anthropic_api_input_tokens} but got {log_entry['prompt_tokens']}"
assert (
log_entry["completion_tokens"] == anthropic_api_output_tokens
), f"Should have completion tokens matching anthropic api. Expected {anthropic_api_output_tokens} but got {log_entry['completion_tokens']}"
assert (
log_entry["total_tokens"]
== log_entry["prompt_tokens"] + log_entry["completion_tokens"]
), "Total tokens should equal prompt + completion"
# Time assertions
assert all(
key in log_entry
for key in ["startTime", "endTime", "completionStartTime"]
), "Should have all time fields"
assert (
log_entry["startTime"] < log_entry["endTime"]
), "Start time should be before end time"
# Metadata assertions
assert (
str(log_entry["cache_hit"]).lower() != "true"
), "Cache should be off"
assert log_entry["request_tags"] == [
"test-tag-1",
"test-tag-2",
], "Tags should match input"
assert (
"user_api_key" in log_entry["metadata"]
), "Should have user API key in metadata"
assert "claude" in log_entry["model"]
assert log_entry["custom_llm_provider"] == "anthropic"
@pytest.mark.asyncio
async def test_anthropic_streaming_with_headers():
print("making streaming request to anthropic passthrough with aiohttp")
headers = {
"Authorization": f"Bearer sk-1234",
"Content-Type": "application/json",
"Anthropic-Version": "2023-06-01",
}
payload = {
"model": "claude-3-5-sonnet-20241022",
"max_tokens": 10,
"messages": [
{"role": "user", "content": "Say 'hello stream test' and nothing else"}
],
"stream": True,
"litellm_metadata": {
"tags": ["test-tag-stream-1", "test-tag-stream-2"],
"user": "test-user-1",
},
}
async with aiohttp.ClientSession() as session:
async with session.post(
"http://0.0.0.0:4000/anthropic/v1/messages", json=payload, headers=headers
) as response:
print("response status")
print(response.status)
assert response.status == 200, "Response should be successful"
response_headers = response.headers
print(f"Response headers: {response_headers}")
litellm_call_id = response_headers.get("x-litellm-call-id")
print(f"LiteLLM Call ID: {litellm_call_id}")
collected_output = []
async for line in response.content:
if line:
text = line.decode("utf-8").strip()
if text.startswith("data: "):
collected_output.append(text[6:]) # Remove 'data: ' prefix
print("Collected output:", "".join(collected_output))
anthropic_api_usage_chunks = []
for chunk in collected_output:
chunk_json = json.loads(chunk)
if "usage" in chunk_json:
anthropic_api_usage_chunks.append(chunk_json["usage"])
elif "message" in chunk_json and "usage" in chunk_json["message"]:
anthropic_api_usage_chunks.append(chunk_json["message"]["usage"])
print(
"anthropic_api_usage_chunks",
json.dumps(anthropic_api_usage_chunks, indent=4, default=str),
)
anthropic_api_input_tokens = sum(
[usage.get("input_tokens", 0) for usage in anthropic_api_usage_chunks]
)
anthropic_api_output_tokens = max(
[usage.get("output_tokens", 0) for usage in anthropic_api_usage_chunks]
)
print("anthropic_api_input_tokens", anthropic_api_input_tokens)
print("anthropic_api_output_tokens", anthropic_api_output_tokens)
# Wait for spend to be logged
await asyncio.sleep(20)
# Check spend logs for this specific request
async with session.get(
f"http://0.0.0.0:4000/spend/logs?request_id={litellm_call_id}",
headers={"Authorization": "Bearer sk-1234"},
) as spend_response:
spend_data = await spend_response.json()
print(f"Spend data: {spend_data}")
assert spend_data is not None, "Should have spend data for the request"
log_entry = spend_data[
0
] # Get the first (and should be only) log entry
# Basic existence checks
assert spend_data is not None, "Should have spend data for the request"
assert isinstance(log_entry, dict), "Log entry should be a dictionary"
# Request metadata assertions
assert (
log_entry["request_id"] == litellm_call_id
), "Request ID should match"
assert (
log_entry["call_type"] == "pass_through_endpoint"
), "Call type should be pass_through_endpoint"
# assert (
# log_entry["api_base"] == "https://api.anthropic.com/v1/messages"
# ), "API base should be Anthropic's endpoint"
# Token and spend assertions
assert log_entry["spend"] > 0, "Spend value should not be None"
assert isinstance(
log_entry["spend"], (int, float)
), "Spend should be a number"
assert log_entry["total_tokens"] > 0, "Should have some tokens"
assert (
log_entry["prompt_tokens"] == anthropic_api_input_tokens
), f"Should have prompt tokens matching anthropic api. Expected {anthropic_api_input_tokens} but got {log_entry['prompt_tokens']}"
assert (
log_entry["completion_tokens"] == anthropic_api_output_tokens
), f"Should have completion tokens matching anthropic api. Expected {anthropic_api_output_tokens} but got {log_entry['completion_tokens']}"
assert (
log_entry["total_tokens"]
== log_entry["prompt_tokens"] + log_entry["completion_tokens"]
), "Total tokens should equal prompt + completion"
# Time assertions
assert all(
key in log_entry
for key in ["startTime", "endTime", "completionStartTime"]
), "Should have all time fields"
assert (
log_entry["startTime"] < log_entry["endTime"]
), "Start time should be before end time"
# Metadata assertions
assert (
str(log_entry["cache_hit"]).lower() != "true"
), "Cache should be off"
assert log_entry["request_tags"] == [
"test-tag-stream-1",
"test-tag-stream-2",
], "Tags should match input"
assert (
"user_api_key" in log_entry["metadata"]
), "Should have user API key in metadata"
assert "claude" in log_entry["model"]
assert log_entry["end_user"] == "test-user-1"
assert log_entry["custom_llm_provider"] == "anthropic"