(Testing) - Add e2e testing for langfuse logging with tags (#7922)
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 14s

* move langfuse tests

* fix test

* fix completion.json

* working test

* test completion with tags

* langfuse testing fixes

* faster logging testing

* pytest-xdist in testing

* fix langfuse testing flow

* fix testing flow

* fix config for logging tests

* fix langfuse completion with tags stream

* fix _verify_langfuse_call
This commit is contained in:
Ishaan Jaff 2025-01-22 09:09:25 -08:00 committed by GitHub
parent 0eec1d860e
commit 1f4ea88228
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 492 additions and 1 deletions

View file

@ -4,7 +4,7 @@ import json
import logging
import os
import sys
from typing import Any
from typing import Any, Optional
from unittest.mock import MagicMock, patch
logging.basicConfig(level=logging.DEBUG)

Binary file not shown.

View file

@ -0,0 +1,85 @@
{
"batch": [
{
"id": "7e00e081-468b-4fe9-a409-eb12ac7d3d2d",
"type": "trace-create",
"body": {
"id": "litellm-test-793c217f-9417-4e77-84a7-8dcc16e5b72b",
"timestamp": "2025-01-16T19:28:55.124873Z",
"name": "litellm-acompletion",
"input": {
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
},
"output": {
"content": "Hello! How can I assist you today?",
"role": "assistant",
"tool_calls": null,
"function_call": null
},
"tags": []
},
"timestamp": "2025-01-16T19:28:55.125002Z"
},
{
"id": "b9ec2c0f-18df-46c7-9e90-624c60bf78ee",
"type": "generation-create",
"body": {
"name": "litellm-acompletion",
"startTime": "2025-01-16T11:28:54.796360-08:00",
"metadata": {
"hidden_params": {
"model_id": null,
"cache_key": null,
"api_base": "https://api.openai.com",
"response_cost": 5.4999999999999995e-05,
"additional_headers": {}
},
"litellm_response_cost": 5.4999999999999995e-05,
"cache_hit": false,
"requester_metadata": {}
},
"input": {
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
},
"output": {
"content": "Hello! How can I assist you today?",
"role": "assistant",
"tool_calls": null,
"function_call": null
},
"level": "DEFAULT",
"id": "time-11-28-54-796360_chatcmpl-521e530f-5e29-4d0a-8d1a-58fca0a847c2",
"endTime": "2025-01-16T11:28:55.124353-08:00",
"completionStartTime": "2025-01-16T11:28:55.124353-08:00",
"model": "gpt-3.5-turbo",
"modelParameters": {
"extra_body": "{}"
},
"usage": {
"input": 10,
"output": 20,
"unit": "TOKENS",
"totalCost": 5.4999999999999995e-05
}
},
"timestamp": "2025-01-16T19:28:55.125258Z"
}
],
"metadata": {
"batch_size": 2,
"sdk_integration": "litellm",
"sdk_name": "python",
"sdk_version": "2.44.1",
"public_key": "pk-lf-03734ab3-8790-4c09-b5fb-8c3b663413b6"
}
}

View file

@ -0,0 +1,94 @@
{
"batch": [
{
"id": "42be960a-5dde-47df-9cbc-1fdd0fdcaa7d",
"type": "trace-create",
"body": {
"id": "litellm-test-f3ab679b-1e1d-43fd-9a9a-f11287aeb339",
"timestamp": "2025-01-22T15:31:28.963419Z",
"name": "litellm-acompletion",
"input": {
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
},
"output": {
"content": "Hello! How can I assist you today?",
"role": "assistant",
"tool_calls": null,
"function_call": null
},
"tags": [
"test_tag",
"test_tag_2"
]
},
"timestamp": "2025-01-22T15:31:28.963706Z"
},
{
"id": "5486df5a-3776-4adf-abd0-bd22e51f7fb4",
"type": "generation-create",
"body": {
"traceId": "litellm-test-f3ab679b-1e1d-43fd-9a9a-f11287aeb339",
"name": "litellm-acompletion",
"startTime": "2025-01-22T07:31:28.960749-08:00",
"metadata": {
"tags": [
"test_tag",
"test_tag_2"
],
"hidden_params": {
"model_id": null,
"cache_key": null,
"api_base": "https://api.openai.com",
"response_cost": 5.4999999999999995e-05,
"additional_headers": {},
"litellm_overhead_time_ms": null
},
"litellm_response_cost": 5.4999999999999995e-05,
"cache_hit": false,
"requester_metadata": {}
},
"input": {
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
},
"output": {
"content": "Hello! How can I assist you today?",
"role": "assistant",
"tool_calls": null,
"function_call": null
},
"level": "DEFAULT",
"id": "time-07-31-28-960749_chatcmpl-f06338f0-8c49-45d8-be35-2854a89723c1",
"endTime": "2025-01-22T07:31:28.962389-08:00",
"completionStartTime": "2025-01-22T07:31:28.962389-08:00",
"model": "gpt-3.5-turbo",
"modelParameters": {
"extra_body": "{}"
},
"usage": {
"input": 10,
"output": 20,
"unit": "TOKENS",
"totalCost": 5.4999999999999995e-05
}
},
"timestamp": "2025-01-22T15:31:28.964179Z"
}
],
"metadata": {
"batch_size": 2,
"sdk_integration": "litellm",
"sdk_name": "python",
"sdk_version": "2.44.1",
"public_key": "pk-lf-e02aaea3-8668-4c9f-8c69-771a4ea1f5c9"
}
}

View file

@ -0,0 +1,94 @@
{
"batch": [
{
"id": "06b8fa9f-151b-4e74-9fbf-8af5222a7f40",
"type": "trace-create",
"body": {
"id": "litellm-test-54368a51-a382-493c-b0a8-3f1af23e18c4",
"timestamp": "2025-01-22T16:38:26.016582Z",
"name": "litellm-acompletion",
"input": {
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
},
"output": {
"content": "Hello! How can I assist you today?",
"role": "assistant",
"tool_calls": null,
"function_call": null
},
"tags": [
"test_tag_stream",
"test_tag_2_stream"
]
},
"timestamp": "2025-01-22T16:38:26.016828Z"
},
{
"id": "4ca1fd78-53e3-41b5-95d9-417b09e3f0eb",
"type": "generation-create",
"body": {
"traceId": "litellm-test-54368a51-a382-493c-b0a8-3f1af23e18c4",
"name": "litellm-acompletion",
"startTime": "2025-01-22T08:38:25.665692-08:00",
"metadata": {
"tags": [
"test_tag_stream",
"test_tag_2_stream"
],
"hidden_params": {
"model_id": null,
"cache_key": null,
"api_base": "https://api.openai.com",
"response_cost": 5.4999999999999995e-05,
"additional_headers": {},
"litellm_overhead_time_ms": null
},
"litellm_response_cost": 5.4999999999999995e-05,
"cache_hit": false,
"requester_metadata": {}
},
"input": {
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
},
"output": {
"content": "Hello! How can I assist you today?",
"role": "assistant",
"tool_calls": null,
"function_call": null
},
"level": "DEFAULT",
"id": "time-08-38-25-665692_chatcmpl-8b67ffb8-4326-4e1b-bf4a-f70930c11c00",
"endTime": "2025-01-22T08:38:26.015666-08:00",
"completionStartTime": "2025-01-22T08:38:26.015666-08:00",
"model": "gpt-3.5-turbo",
"modelParameters": {
"extra_body": "{}"
},
"usage": {
"input": 10,
"output": 20,
"unit": "TOKENS",
"totalCost": 5.4999999999999995e-05
}
},
"timestamp": "2025-01-22T16:38:26.017252Z"
}
],
"metadata": {
"batch_size": 2,
"sdk_integration": "litellm",
"sdk_name": "python",
"sdk_version": "2.44.1",
"public_key": "pk-lf-e02aaea3-8668-4c9f-8c69-771a4ea1f5c9"
}
}

View file

@ -0,0 +1,218 @@
import asyncio
import copy
import json
import logging
import os
import sys
from typing import Any, Optional
from unittest.mock import MagicMock, patch
logging.basicConfig(level=logging.DEBUG)
sys.path.insert(0, os.path.abspath("../.."))
import litellm
from litellm import completion
from litellm.caching import InMemoryCache
litellm.num_retries = 3
litellm.success_callback = ["langfuse"]
os.environ["LANGFUSE_DEBUG"] = "True"
import time
import pytest
def assert_langfuse_request_matches_expected(
actual_request_body: dict,
expected_file_name: str,
trace_id: Optional[str] = None,
):
"""
Helper function to compare actual Langfuse request body with expected JSON file.
Args:
actual_request_body (dict): The actual request body received from the API call
expected_file_name (str): Name of the JSON file containing expected request body (e.g., "transcription.json")
"""
# Get the current directory and read the expected request body
pwd = os.path.dirname(os.path.realpath(__file__))
expected_body_path = os.path.join(
pwd, "langfuse_expected_request_body", expected_file_name
)
with open(expected_body_path, "r") as f:
expected_request_body = json.load(f)
# Filter out events that don't match the trace_id
if trace_id:
actual_request_body["batch"] = [
item
for item in actual_request_body["batch"]
if (item["type"] == "trace-create" and item["body"].get("id") == trace_id)
or (
item["type"] == "generation-create"
and item["body"].get("traceId") == trace_id
)
]
print(
"actual_request_body after filtering", json.dumps(actual_request_body, indent=4)
)
# Replace dynamic values in actual request body
for item in actual_request_body["batch"]:
# Replace IDs with expected IDs
if item["type"] == "trace-create":
item["id"] = expected_request_body["batch"][0]["id"]
item["body"]["id"] = expected_request_body["batch"][0]["body"]["id"]
item["timestamp"] = expected_request_body["batch"][0]["timestamp"]
item["body"]["timestamp"] = expected_request_body["batch"][0]["body"][
"timestamp"
]
elif item["type"] == "generation-create":
item["id"] = expected_request_body["batch"][1]["id"]
item["body"]["id"] = expected_request_body["batch"][1]["body"]["id"]
item["timestamp"] = expected_request_body["batch"][1]["timestamp"]
item["body"]["startTime"] = expected_request_body["batch"][1]["body"][
"startTime"
]
item["body"]["endTime"] = expected_request_body["batch"][1]["body"][
"endTime"
]
item["body"]["completionStartTime"] = expected_request_body["batch"][1][
"body"
]["completionStartTime"]
if trace_id is None:
print("popping traceId")
item["body"].pop("traceId")
else:
item["body"]["traceId"] = trace_id
expected_request_body["batch"][1]["body"]["traceId"] = trace_id
# Replace SDK version with expected version
actual_request_body["batch"][0]["body"].pop("release", None)
actual_request_body["metadata"]["sdk_version"] = expected_request_body["metadata"][
"sdk_version"
]
# replace "public_key" with expected public key
actual_request_body["metadata"]["public_key"] = expected_request_body["metadata"][
"public_key"
]
actual_request_body["batch"][1]["body"]["metadata"] = expected_request_body[
"batch"
][1]["body"]["metadata"]
actual_request_body["metadata"]["sdk_integration"] = expected_request_body[
"metadata"
]["sdk_integration"]
actual_request_body["metadata"]["batch_size"] = expected_request_body["metadata"][
"batch_size"
]
# Assert the entire request body matches
assert (
actual_request_body == expected_request_body
), f"Difference in request bodies: {json.dumps(actual_request_body, indent=2)} != {json.dumps(expected_request_body, indent=2)}"
class TestLangfuseLogging:
@pytest.fixture
async def mock_setup(self):
"""Common setup for Langfuse logging tests"""
import uuid
from unittest.mock import AsyncMock, patch
import httpx
# Create a mock Response object
mock_response = AsyncMock(spec=httpx.Response)
mock_response.status_code = 200
mock_response.json.return_value = {"status": "success"}
# Create mock for httpx.Client.post
mock_post = AsyncMock()
mock_post.return_value = mock_response
litellm.set_verbose = True
litellm.success_callback = ["langfuse"]
return {"trace_id": f"litellm-test-{str(uuid.uuid4())}", "mock_post": mock_post}
async def _verify_langfuse_call(
self,
mock_post,
expected_file_name: str,
trace_id: str,
):
"""Helper method to verify Langfuse API calls"""
await asyncio.sleep(3)
# Verify the call
assert mock_post.call_count >= 1
url = mock_post.call_args[0][0]
request_body = mock_post.call_args[1].get("content")
# Parse the JSON string into a dict for assertions
actual_request_body = json.loads(request_body)
print("\nMocked Request Details:")
print(f"URL: {url}")
print(f"Request Body: {json.dumps(actual_request_body, indent=4)}")
assert url == "https://us.cloud.langfuse.com/api/public/ingestion"
assert_langfuse_request_matches_expected(
actual_request_body,
expected_file_name,
trace_id,
)
@pytest.mark.asyncio
async def test_langfuse_logging_completion(self, mock_setup):
"""Test Langfuse logging for chat completion"""
setup = await mock_setup # Await the fixture
with patch("httpx.Client.post", setup["mock_post"]):
await litellm.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hello!"}],
mock_response="Hello! How can I assist you today?",
metadata={"trace_id": setup["trace_id"]},
)
await self._verify_langfuse_call(
setup["mock_post"], "completion.json", setup["trace_id"]
)
@pytest.mark.asyncio
async def test_langfuse_logging_completion_with_tags(self, mock_setup):
"""Test Langfuse logging for chat completion with tags"""
setup = await mock_setup # Await the fixture
with patch("httpx.Client.post", setup["mock_post"]):
await litellm.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hello!"}],
mock_response="Hello! How can I assist you today?",
metadata={
"trace_id": setup["trace_id"],
"tags": ["test_tag", "test_tag_2"],
},
)
await self._verify_langfuse_call(
setup["mock_post"], "completion_with_tags.json", setup["trace_id"]
)
@pytest.mark.asyncio
async def test_langfuse_logging_completion_with_tags_stream(self, mock_setup):
"""Test Langfuse logging for chat completion with tags"""
setup = await mock_setup # Await the fixture
with patch("httpx.Client.post", setup["mock_post"]):
await litellm.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hello!"}],
mock_response="Hello! How can I assist you today?",
metadata={
"trace_id": setup["trace_id"],
"tags": ["test_tag_stream", "test_tag_2_stream"],
},
)
await self._verify_langfuse_call(
setup["mock_post"],
"completion_with_tags_stream.json",
setup["trace_id"],
)