(Testing) - Add e2e testing for langfuse logging with tags (#7922)
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 14s

* move langfuse tests

* fix test

* fix completion.json

* working test

* test completion with tags

* langfuse testing fixes

* faster logging testing

* pytest-xdist in testing

* fix langfuse testing flow

* fix testing flow

* fix config for logging tests

* fix langfuse completion with tags stream

* fix _verify_langfuse_call
This commit is contained in:
Ishaan Jaff 2025-01-22 09:09:25 -08:00 committed by GitHub
parent 0eec1d860e
commit 1f4ea88228
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 492 additions and 1 deletions

View file

@ -4,7 +4,7 @@ import json
import logging import logging
import os import os
import sys import sys
from typing import Any from typing import Any, Optional
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)

Binary file not shown.

View file

@ -0,0 +1,85 @@
{
"batch": [
{
"id": "7e00e081-468b-4fe9-a409-eb12ac7d3d2d",
"type": "trace-create",
"body": {
"id": "litellm-test-793c217f-9417-4e77-84a7-8dcc16e5b72b",
"timestamp": "2025-01-16T19:28:55.124873Z",
"name": "litellm-acompletion",
"input": {
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
},
"output": {
"content": "Hello! How can I assist you today?",
"role": "assistant",
"tool_calls": null,
"function_call": null
},
"tags": []
},
"timestamp": "2025-01-16T19:28:55.125002Z"
},
{
"id": "b9ec2c0f-18df-46c7-9e90-624c60bf78ee",
"type": "generation-create",
"body": {
"name": "litellm-acompletion",
"startTime": "2025-01-16T11:28:54.796360-08:00",
"metadata": {
"hidden_params": {
"model_id": null,
"cache_key": null,
"api_base": "https://api.openai.com",
"response_cost": 5.4999999999999995e-05,
"additional_headers": {}
},
"litellm_response_cost": 5.4999999999999995e-05,
"cache_hit": false,
"requester_metadata": {}
},
"input": {
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
},
"output": {
"content": "Hello! How can I assist you today?",
"role": "assistant",
"tool_calls": null,
"function_call": null
},
"level": "DEFAULT",
"id": "time-11-28-54-796360_chatcmpl-521e530f-5e29-4d0a-8d1a-58fca0a847c2",
"endTime": "2025-01-16T11:28:55.124353-08:00",
"completionStartTime": "2025-01-16T11:28:55.124353-08:00",
"model": "gpt-3.5-turbo",
"modelParameters": {
"extra_body": "{}"
},
"usage": {
"input": 10,
"output": 20,
"unit": "TOKENS",
"totalCost": 5.4999999999999995e-05
}
},
"timestamp": "2025-01-16T19:28:55.125258Z"
}
],
"metadata": {
"batch_size": 2,
"sdk_integration": "litellm",
"sdk_name": "python",
"sdk_version": "2.44.1",
"public_key": "pk-lf-03734ab3-8790-4c09-b5fb-8c3b663413b6"
}
}

View file

@ -0,0 +1,94 @@
{
"batch": [
{
"id": "42be960a-5dde-47df-9cbc-1fdd0fdcaa7d",
"type": "trace-create",
"body": {
"id": "litellm-test-f3ab679b-1e1d-43fd-9a9a-f11287aeb339",
"timestamp": "2025-01-22T15:31:28.963419Z",
"name": "litellm-acompletion",
"input": {
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
},
"output": {
"content": "Hello! How can I assist you today?",
"role": "assistant",
"tool_calls": null,
"function_call": null
},
"tags": [
"test_tag",
"test_tag_2"
]
},
"timestamp": "2025-01-22T15:31:28.963706Z"
},
{
"id": "5486df5a-3776-4adf-abd0-bd22e51f7fb4",
"type": "generation-create",
"body": {
"traceId": "litellm-test-f3ab679b-1e1d-43fd-9a9a-f11287aeb339",
"name": "litellm-acompletion",
"startTime": "2025-01-22T07:31:28.960749-08:00",
"metadata": {
"tags": [
"test_tag",
"test_tag_2"
],
"hidden_params": {
"model_id": null,
"cache_key": null,
"api_base": "https://api.openai.com",
"response_cost": 5.4999999999999995e-05,
"additional_headers": {},
"litellm_overhead_time_ms": null
},
"litellm_response_cost": 5.4999999999999995e-05,
"cache_hit": false,
"requester_metadata": {}
},
"input": {
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
},
"output": {
"content": "Hello! How can I assist you today?",
"role": "assistant",
"tool_calls": null,
"function_call": null
},
"level": "DEFAULT",
"id": "time-07-31-28-960749_chatcmpl-f06338f0-8c49-45d8-be35-2854a89723c1",
"endTime": "2025-01-22T07:31:28.962389-08:00",
"completionStartTime": "2025-01-22T07:31:28.962389-08:00",
"model": "gpt-3.5-turbo",
"modelParameters": {
"extra_body": "{}"
},
"usage": {
"input": 10,
"output": 20,
"unit": "TOKENS",
"totalCost": 5.4999999999999995e-05
}
},
"timestamp": "2025-01-22T15:31:28.964179Z"
}
],
"metadata": {
"batch_size": 2,
"sdk_integration": "litellm",
"sdk_name": "python",
"sdk_version": "2.44.1",
"public_key": "pk-lf-e02aaea3-8668-4c9f-8c69-771a4ea1f5c9"
}
}

View file

@ -0,0 +1,94 @@
{
"batch": [
{
"id": "06b8fa9f-151b-4e74-9fbf-8af5222a7f40",
"type": "trace-create",
"body": {
"id": "litellm-test-54368a51-a382-493c-b0a8-3f1af23e18c4",
"timestamp": "2025-01-22T16:38:26.016582Z",
"name": "litellm-acompletion",
"input": {
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
},
"output": {
"content": "Hello! How can I assist you today?",
"role": "assistant",
"tool_calls": null,
"function_call": null
},
"tags": [
"test_tag_stream",
"test_tag_2_stream"
]
},
"timestamp": "2025-01-22T16:38:26.016828Z"
},
{
"id": "4ca1fd78-53e3-41b5-95d9-417b09e3f0eb",
"type": "generation-create",
"body": {
"traceId": "litellm-test-54368a51-a382-493c-b0a8-3f1af23e18c4",
"name": "litellm-acompletion",
"startTime": "2025-01-22T08:38:25.665692-08:00",
"metadata": {
"tags": [
"test_tag_stream",
"test_tag_2_stream"
],
"hidden_params": {
"model_id": null,
"cache_key": null,
"api_base": "https://api.openai.com",
"response_cost": 5.4999999999999995e-05,
"additional_headers": {},
"litellm_overhead_time_ms": null
},
"litellm_response_cost": 5.4999999999999995e-05,
"cache_hit": false,
"requester_metadata": {}
},
"input": {
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
},
"output": {
"content": "Hello! How can I assist you today?",
"role": "assistant",
"tool_calls": null,
"function_call": null
},
"level": "DEFAULT",
"id": "time-08-38-25-665692_chatcmpl-8b67ffb8-4326-4e1b-bf4a-f70930c11c00",
"endTime": "2025-01-22T08:38:26.015666-08:00",
"completionStartTime": "2025-01-22T08:38:26.015666-08:00",
"model": "gpt-3.5-turbo",
"modelParameters": {
"extra_body": "{}"
},
"usage": {
"input": 10,
"output": 20,
"unit": "TOKENS",
"totalCost": 5.4999999999999995e-05
}
},
"timestamp": "2025-01-22T16:38:26.017252Z"
}
],
"metadata": {
"batch_size": 2,
"sdk_integration": "litellm",
"sdk_name": "python",
"sdk_version": "2.44.1",
"public_key": "pk-lf-e02aaea3-8668-4c9f-8c69-771a4ea1f5c9"
}
}

View file

@ -0,0 +1,218 @@
import asyncio
import copy
import json
import logging
import os
import sys
from typing import Any, Optional
from unittest.mock import MagicMock, patch
logging.basicConfig(level=logging.DEBUG)
sys.path.insert(0, os.path.abspath("../.."))
import litellm
from litellm import completion
from litellm.caching import InMemoryCache
litellm.num_retries = 3
litellm.success_callback = ["langfuse"]
os.environ["LANGFUSE_DEBUG"] = "True"
import time
import pytest
def assert_langfuse_request_matches_expected(
actual_request_body: dict,
expected_file_name: str,
trace_id: Optional[str] = None,
):
"""
Helper function to compare actual Langfuse request body with expected JSON file.
Args:
actual_request_body (dict): The actual request body received from the API call
expected_file_name (str): Name of the JSON file containing expected request body (e.g., "transcription.json")
"""
# Get the current directory and read the expected request body
pwd = os.path.dirname(os.path.realpath(__file__))
expected_body_path = os.path.join(
pwd, "langfuse_expected_request_body", expected_file_name
)
with open(expected_body_path, "r") as f:
expected_request_body = json.load(f)
# Filter out events that don't match the trace_id
if trace_id:
actual_request_body["batch"] = [
item
for item in actual_request_body["batch"]
if (item["type"] == "trace-create" and item["body"].get("id") == trace_id)
or (
item["type"] == "generation-create"
and item["body"].get("traceId") == trace_id
)
]
print(
"actual_request_body after filtering", json.dumps(actual_request_body, indent=4)
)
# Replace dynamic values in actual request body
for item in actual_request_body["batch"]:
# Replace IDs with expected IDs
if item["type"] == "trace-create":
item["id"] = expected_request_body["batch"][0]["id"]
item["body"]["id"] = expected_request_body["batch"][0]["body"]["id"]
item["timestamp"] = expected_request_body["batch"][0]["timestamp"]
item["body"]["timestamp"] = expected_request_body["batch"][0]["body"][
"timestamp"
]
elif item["type"] == "generation-create":
item["id"] = expected_request_body["batch"][1]["id"]
item["body"]["id"] = expected_request_body["batch"][1]["body"]["id"]
item["timestamp"] = expected_request_body["batch"][1]["timestamp"]
item["body"]["startTime"] = expected_request_body["batch"][1]["body"][
"startTime"
]
item["body"]["endTime"] = expected_request_body["batch"][1]["body"][
"endTime"
]
item["body"]["completionStartTime"] = expected_request_body["batch"][1][
"body"
]["completionStartTime"]
if trace_id is None:
print("popping traceId")
item["body"].pop("traceId")
else:
item["body"]["traceId"] = trace_id
expected_request_body["batch"][1]["body"]["traceId"] = trace_id
# Replace SDK version with expected version
actual_request_body["batch"][0]["body"].pop("release", None)
actual_request_body["metadata"]["sdk_version"] = expected_request_body["metadata"][
"sdk_version"
]
# replace "public_key" with expected public key
actual_request_body["metadata"]["public_key"] = expected_request_body["metadata"][
"public_key"
]
actual_request_body["batch"][1]["body"]["metadata"] = expected_request_body[
"batch"
][1]["body"]["metadata"]
actual_request_body["metadata"]["sdk_integration"] = expected_request_body[
"metadata"
]["sdk_integration"]
actual_request_body["metadata"]["batch_size"] = expected_request_body["metadata"][
"batch_size"
]
# Assert the entire request body matches
assert (
actual_request_body == expected_request_body
), f"Difference in request bodies: {json.dumps(actual_request_body, indent=2)} != {json.dumps(expected_request_body, indent=2)}"
class TestLangfuseLogging:
@pytest.fixture
async def mock_setup(self):
"""Common setup for Langfuse logging tests"""
import uuid
from unittest.mock import AsyncMock, patch
import httpx
# Create a mock Response object
mock_response = AsyncMock(spec=httpx.Response)
mock_response.status_code = 200
mock_response.json.return_value = {"status": "success"}
# Create mock for httpx.Client.post
mock_post = AsyncMock()
mock_post.return_value = mock_response
litellm.set_verbose = True
litellm.success_callback = ["langfuse"]
return {"trace_id": f"litellm-test-{str(uuid.uuid4())}", "mock_post": mock_post}
async def _verify_langfuse_call(
self,
mock_post,
expected_file_name: str,
trace_id: str,
):
"""Helper method to verify Langfuse API calls"""
await asyncio.sleep(3)
# Verify the call
assert mock_post.call_count >= 1
url = mock_post.call_args[0][0]
request_body = mock_post.call_args[1].get("content")
# Parse the JSON string into a dict for assertions
actual_request_body = json.loads(request_body)
print("\nMocked Request Details:")
print(f"URL: {url}")
print(f"Request Body: {json.dumps(actual_request_body, indent=4)}")
assert url == "https://us.cloud.langfuse.com/api/public/ingestion"
assert_langfuse_request_matches_expected(
actual_request_body,
expected_file_name,
trace_id,
)
@pytest.mark.asyncio
async def test_langfuse_logging_completion(self, mock_setup):
"""Test Langfuse logging for chat completion"""
setup = await mock_setup # Await the fixture
with patch("httpx.Client.post", setup["mock_post"]):
await litellm.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hello!"}],
mock_response="Hello! How can I assist you today?",
metadata={"trace_id": setup["trace_id"]},
)
await self._verify_langfuse_call(
setup["mock_post"], "completion.json", setup["trace_id"]
)
@pytest.mark.asyncio
async def test_langfuse_logging_completion_with_tags(self, mock_setup):
"""Test Langfuse logging for chat completion with tags"""
setup = await mock_setup # Await the fixture
with patch("httpx.Client.post", setup["mock_post"]):
await litellm.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hello!"}],
mock_response="Hello! How can I assist you today?",
metadata={
"trace_id": setup["trace_id"],
"tags": ["test_tag", "test_tag_2"],
},
)
await self._verify_langfuse_call(
setup["mock_post"], "completion_with_tags.json", setup["trace_id"]
)
@pytest.mark.asyncio
async def test_langfuse_logging_completion_with_tags_stream(self, mock_setup):
"""Test Langfuse logging for chat completion with tags"""
setup = await mock_setup # Await the fixture
with patch("httpx.Client.post", setup["mock_post"]):
await litellm.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hello!"}],
mock_response="Hello! How can I assist you today?",
metadata={
"trace_id": setup["trace_id"],
"tags": ["test_tag_stream", "test_tag_2_stream"],
},
)
await self._verify_langfuse_call(
setup["mock_post"],
"completion_with_tags_stream.json",
setup["trace_id"],
)