litellm-mirror/litellm/tests/test_logfire.py

import sys
import os
import json
import time

import logfire
import litellm
import pytest
from logfire.testing import TestExporter, SimpleSpanProcessor

sys.path.insert(0, os.path.abspath("../.."))

# Testing scenarios for logfire logging:
# 1. Test logfire logging for completion
# 2. Test logfire logging for acompletion
# 3. Test logfire logging for completion while streaming is enabled
# 4. Test logfire logging for completion while streaming is enabled


@pytest.mark.parametrize("stream", [False, True])
def test_completion_logfire_logging(stream):
    litellm.success_callback = ["logfire"]
    litellm.set_verbose = True

    exporter = TestExporter()
    logfire.configure(
        send_to_logfire=False,
        console=False,
        processors=[SimpleSpanProcessor(exporter)],
        collect_system_metrics=False,
    )
    messages = [{"role": "user", "content": "what llm are u"}]
    temperature = 0.3
    max_tokens = 10
    response = litellm.completion(
        model="gpt-3.5-turbo",
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
        stream=stream,
    )
    print(response)

    if stream:
        for chunk in response:
            print(chunk)

    time.sleep(5)
    exported_spans = exporter.exported_spans_as_dict()

    assert len(exported_spans) == 1
    assert (
        exported_spans[0]["attributes"]["logfire.msg"]
        == "Chat Completion with 'gpt-3.5-turbo'"
    )

    request_data = json.loads(exported_spans[0]["attributes"]["request_data"])

    assert request_data["model"] == "gpt-3.5-turbo"
    assert request_data["messages"] == messages

    assert "completion_tokens" in request_data["usage"]
    assert "prompt_tokens" in request_data["usage"]
    assert "total_tokens" in request_data["usage"]
    assert request_data["response"]["choices"][0]["message"]["content"]
    assert request_data["modelParameters"]["max_tokens"] == max_tokens
    assert request_data["modelParameters"]["temperature"] == temperature


@pytest.mark.asyncio
@pytest.mark.parametrize("stream", [False, True])
async def test_acompletion_logfire_logging(stream):
    litellm.success_callback = ["logfire"]
    litellm.set_verbose = True

    exporter = TestExporter()
    logfire.configure(
        send_to_logfire=False,
        console=False,
        processors=[SimpleSpanProcessor(exporter)],
        collect_system_metrics=False,
    )
    messages = [{"role": "user", "content": "what llm are u"}]
    temperature = 0.3
    max_tokens = 10
    response = await litellm.acompletion(
        model="gpt-3.5-turbo",
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
    )
    print(response)
    if stream:
        for chunk in response:
            print(chunk)

    time.sleep(5)
    exported_spans = exporter.exported_spans_as_dict()
    print("exported_spans", exported_spans)

    assert len(exported_spans) == 1
    assert (
        exported_spans[0]["attributes"]["logfire.msg"]
        == "Chat Completion with 'gpt-3.5-turbo'"
    )

    request_data = json.loads(exported_spans[0]["attributes"]["request_data"])

    assert request_data["model"] == "gpt-3.5-turbo"
    assert request_data["messages"] == messages

    assert "completion_tokens" in request_data["usage"]
    assert "prompt_tokens" in request_data["usage"]
    assert "total_tokens" in request_data["usage"]
    assert request_data["response"]["choices"][0]["message"]["content"]
    assert request_data["modelParameters"]["max_tokens"] == max_tokens
    assert request_data["modelParameters"]["temperature"] == temperature