diff --git a/litellm/tests/test_logfire.py b/litellm/tests/test_logfire.py new file mode 100644 index 0000000000..da1cb7bde8 --- /dev/null +++ b/litellm/tests/test_logfire.py @@ -0,0 +1,117 @@ +import sys +import os +import json +import time + +import logfire +import litellm +import pytest +from logfire.testing import TestExporter, SimpleSpanProcessor + +sys.path.insert(0, os.path.abspath("../..")) + +# Testing scenarios for logfire logging: +# 1. Test logfire logging for completion +# 2. Test logfire logging for acompletion +# 3. Test logfire logging for completion while streaming is enabled +# 4. Test logfire logging for completion while streaming is enabled + + +@pytest.mark.parametrize("stream", [False, True]) +def test_completion_logfire_logging(stream): + litellm.success_callback = ["logfire"] + litellm.set_verbose = True + + exporter = TestExporter() + logfire.configure( + send_to_logfire=False, + console=False, + processors=[SimpleSpanProcessor(exporter)], + collect_system_metrics=False, + ) + messages = [{"role": "user", "content": "what llm are u"}] + temperature = 0.3 + max_tokens = 10 + response = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + max_tokens=max_tokens, + temperature=temperature, + stream=stream, + ) + print(response) + + if stream: + for chunk in response: + print(chunk) + + time.sleep(5) + exported_spans = exporter.exported_spans_as_dict() + + assert len(exported_spans) == 1 + assert ( + exported_spans[0]["attributes"]["logfire.msg"] + == "Chat Completion with 'gpt-3.5-turbo'" + ) + + request_data = json.loads(exported_spans[0]["attributes"]["request_data"]) + + assert request_data["model"] == "gpt-3.5-turbo" + assert request_data["messages"] == messages + + assert "completion_tokens" in request_data["usage"] + assert "prompt_tokens" in request_data["usage"] + assert "total_tokens" in request_data["usage"] + assert request_data["response"]["choices"][0]["message"]["content"] + assert request_data["modelParameters"]["max_tokens"] == max_tokens + assert request_data["modelParameters"]["temperature"] == temperature + + +@pytest.mark.asyncio +@pytest.mark.parametrize("stream", [False, True]) +async def test_acompletion_logfire_logging(stream): + litellm.success_callback = ["logfire"] + litellm.set_verbose = True + + exporter = TestExporter() + logfire.configure( + send_to_logfire=False, + console=False, + processors=[SimpleSpanProcessor(exporter)], + collect_system_metrics=False, + ) + messages = [{"role": "user", "content": "what llm are u"}] + temperature = 0.3 + max_tokens = 10 + response = await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + max_tokens=max_tokens, + temperature=temperature, + ) + print(response) + if stream: + for chunk in response: + print(chunk) + + time.sleep(5) + exported_spans = exporter.exported_spans_as_dict() + print("exported_spans", exported_spans) + + assert len(exported_spans) == 1 + assert ( + exported_spans[0]["attributes"]["logfire.msg"] + == "Chat Completion with 'gpt-3.5-turbo'" + ) + + request_data = json.loads(exported_spans[0]["attributes"]["request_data"]) + + assert request_data["model"] == "gpt-3.5-turbo" + assert request_data["messages"] == messages + + assert "completion_tokens" in request_data["usage"] + assert "prompt_tokens" in request_data["usage"] + assert "total_tokens" in request_data["usage"] + assert request_data["response"]["choices"][0]["message"]["content"] + assert request_data["modelParameters"]["max_tokens"] == max_tokens + assert request_data["modelParameters"]["temperature"] == temperature