From 5bada7cbced63506aa7903926a5af5170de7dc2c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 17 Oct 2024 16:32:56 +0530 Subject: [PATCH] fix otel tests --- .../test_otel_logging.py | 259 ++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 tests/logging_callback_tests/test_otel_logging.py diff --git a/tests/logging_callback_tests/test_otel_logging.py b/tests/logging_callback_tests/test_otel_logging.py new file mode 100644 index 000000000..c8aefa713 --- /dev/null +++ b/tests/logging_callback_tests/test_otel_logging.py @@ -0,0 +1,259 @@ +import json +import os +import sys +from datetime import datetime +from unittest.mock import AsyncMock + +from pydantic.main import Model + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system-path + +import pytest +import litellm +from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig, Span +import asyncio +import logging +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter +from litellm._logging import verbose_logger + + +verbose_logger.setLevel(logging.DEBUG) + +EXPECTED_SPAN_NAMES = ["litellm_request", "raw_gen_ai_request"] +exporter = InMemorySpanExporter() + + +@pytest.mark.asyncio +@pytest.mark.parametrize("streaming", [True, False]) +async def test_async_otel_callback(streaming): + litellm.set_verbose = True + + litellm.callbacks = [OpenTelemetry(config=OpenTelemetryConfig(exporter=exporter))] + + response = await litellm.acompletion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "hi"}], + temperature=0.1, + user="OTEL_USER", + stream=streaming, + ) + + if streaming is True: + async for chunk in response: + print("chunk", chunk) + + await asyncio.sleep(4) + spans = exporter.get_finished_spans() + print("spans", spans) + assert len(spans) == 2 + + _span_names = [span.name for span in spans] + print("recorded span names", _span_names) + assert set(_span_names) == set(EXPECTED_SPAN_NAMES) + + # print the value of a span + for span in spans: + print("span name", span.name) + print("span attributes", span.attributes) + + if span.name == "litellm_request": + validate_litellm_request(span) + # Additional specific checks + assert span._attributes["gen_ai.request.model"] == "gpt-3.5-turbo" + assert span._attributes["gen_ai.system"] == "openai" + assert span._attributes["gen_ai.request.temperature"] == 0.1 + assert span._attributes["llm.is_streaming"] == str(streaming) + assert span._attributes["llm.user"] == "OTEL_USER" + elif span.name == "raw_gen_ai_request": + if streaming is True: + validate_raw_gen_ai_request_openai_streaming(span) + else: + validate_raw_gen_ai_request_openai_non_streaming(span) + + # clear in memory exporter + exporter.clear() + + +def validate_litellm_request(span): + expected_attributes = [ + "gen_ai.request.model", + "gen_ai.system", + "gen_ai.request.temperature", + "llm.is_streaming", + "llm.user", + "gen_ai.response.id", + "gen_ai.response.model", + "llm.usage.total_tokens", + "gen_ai.usage.completion_tokens", + "gen_ai.usage.prompt_tokens", + ] + + # get the str of all the span attributes + print("span attributes", span._attributes) + + for attr in expected_attributes: + value = span._attributes[attr] + print("value", value) + assert value is not None, f"Attribute {attr} has None value" + + +def validate_raw_gen_ai_request_openai_non_streaming(span): + expected_attributes = [ + "llm.openai.messages", + "llm.openai.temperature", + "llm.openai.user", + "llm.openai.extra_body", + "llm.openai.id", + "llm.openai.choices", + "llm.openai.created", + "llm.openai.model", + "llm.openai.object", + "llm.openai.service_tier", + "llm.openai.system_fingerprint", + "llm.openai.usage", + ] + + print("span attributes", span._attributes) + for attr in span._attributes: + print(attr) + + for attr in expected_attributes: + assert span._attributes[attr] is not None, f"Attribute {attr} has None" + + +def validate_raw_gen_ai_request_openai_streaming(span): + expected_attributes = [ + "llm.openai.messages", + "llm.openai.temperature", + "llm.openai.user", + "llm.openai.extra_body", + "llm.openai.model", + ] + + print("span attributes", span._attributes) + for attr in span._attributes: + print(attr) + + for attr in expected_attributes: + assert span._attributes[attr] is not None, f"Attribute {attr} has None" + + +@pytest.mark.parametrize( + "model", + ["anthropic/claude-3-opus-20240229"], +) +def test_completion_claude_3_function_call_with_otel(model): + litellm.set_verbose = True + + litellm.callbacks = [OpenTelemetry(config=OpenTelemetryConfig(exporter=exporter))] + tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location"], + }, + }, + } + ] + messages = [ + { + "role": "user", + "content": "What's the weather like in Boston today in Fahrenheit?", + } + ] + try: + # test without max tokens + response = litellm.completion( + model=model, + messages=messages, + tools=tools, + tool_choice={ + "type": "function", + "function": {"name": "get_current_weather"}, + }, + drop_params=True, + ) + + print("response from LiteLLM", response) + + except Exception as e: + pytest.fail(f"Error occurred: {e}") + finally: + # clear in memory exporter + exporter.clear() + + +@pytest.mark.asyncio +@pytest.mark.parametrize("streaming", [True, False]) +@pytest.mark.parametrize("global_redact", [True, False]) +async def test_awesome_otel_with_message_logging_off(streaming, global_redact): + litellm.set_verbose = True + litellm.callbacks = [OpenTelemetry(config=OpenTelemetryConfig(exporter=exporter))] + if global_redact is False: + otel_logger = OpenTelemetry( + message_logging=False, config=OpenTelemetryConfig(exporter="console") + ) + else: + # use global redaction + litellm.turn_off_message_logging = True + otel_logger = OpenTelemetry(config=OpenTelemetryConfig(exporter="console")) + + litellm.callbacks = [otel_logger] + litellm.success_callback = [] + litellm.failure_callback = [] + + response = await litellm.acompletion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "hi"}], + mock_response="hi", + stream=streaming, + ) + print("response", response) + + if streaming is True: + async for chunk in response: + print("chunk", chunk) + + await asyncio.sleep(1) + spans = exporter.get_finished_spans() + print("spans", spans) + assert len(spans) == 1 + + _span = spans[0] + print("span attributes", _span.attributes) + + validate_redacted_message_span_attributes(_span) + + # clear in memory exporter + exporter.clear() + + if global_redact is True: + litellm.turn_off_message_logging = False + + +def validate_redacted_message_span_attributes(span): + expected_attributes = [ + "gen_ai.request.model", + "gen_ai.system", + "llm.is_streaming", + "gen_ai.response.id", + "gen_ai.response.model", + "llm.usage.total_tokens", + "gen_ai.usage.completion_tokens", + "gen_ai.usage.prompt_tokens", + ] + + pass