Merge pull request #4078 from BerriAI/litellm_low_raw_request_response_otel

[FEAT]- OTEL Log raw LLM request/response on OTEL
2025-04-27 03:34:10 +00:00 · 2024-06-08 19:49:28 -07:00 · 2024-06-08 19:49:28 -07:00 · 8c5802d506
commit 8c5802d506
parent 0d83ffb4e2 6f40fc6a81
3 changed files with 257 additions and 80 deletions
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -1,6 +1,7 @@
 import os
 from dataclasses import dataclass
 from datetime import datetime
 import litellm
 from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_logger
@ -22,6 +23,8 @@ LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
 LITELLM_RESOURCE = {
    "service.name": os.getenv("OTEL_SERVICE_NAME", "litellm"),
 }
 RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request"
 LITELLM_REQUEST_SPAN_NAME = "litellm_request"
@dataclass
@ -194,6 +197,7 @@ class OpenTelemetry(CustomLogger):
    def _handle_sucess(self, kwargs, response_obj, start_time, end_time):
        from opentelemetry.trace import Status, StatusCode
        from opentelemetry import trace
        verbose_logger.debug(
            "OpenTelemetry Logger: Logging kwargs: %s, OTEL config settings=%s",
@ -202,6 +206,7 @@ class OpenTelemetry(CustomLogger):
        )
        _parent_context, parent_otel_span = self._get_span_context(kwargs)
        # Span 1: Requst sent to litellm SDK
        span = self.tracer.start_span(
            name=self._get_span_name(kwargs),
            start_time=self._to_ns(start_time),
@ -209,7 +214,23 @@ class OpenTelemetry(CustomLogger):
        )
        span.set_status(Status(StatusCode.OK))
        self.set_attributes(span, kwargs, response_obj)
        if litellm.turn_off_message_logging is True:
            pass
        else:
            # Span 2: Raw Request / Response to LLM
            raw_request_span = self.tracer.start_span(
                name=RAW_REQUEST_SPAN_NAME,
                start_time=self._to_ns(start_time),
                context=trace.set_span_in_context(span),
            )
            raw_request_span.set_status(Status(StatusCode.OK))
            self.set_raw_request_attributes(raw_request_span, kwargs, response_obj)
            raw_request_span.end(end_time=self._to_ns(end_time))
        span.end(end_time=self._to_ns(end_time))
        if parent_otel_span is not None:
            parent_otel_span.end(end_time=self._to_ns(datetime.now()))
@ -225,6 +246,31 @@ class OpenTelemetry(CustomLogger):
        self.set_attributes(span, kwargs, response_obj)
        span.end(end_time=self._to_ns(end_time))
    def set_tools_attributes(self, span: Span, tools):
        from opentelemetry.semconv.ai import SpanAttributes
        import json
        if not tools:
            return
        try:
            for i, tool in enumerate(tools):
                function = tool.get("function")
                if not function:
                    continue
                prefix = f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}"
                span.set_attribute(f"{prefix}.name", function.get("name"))
                span.set_attribute(f"{prefix}.description", function.get("description"))
                span.set_attribute(
                    f"{prefix}.parameters", json.dumps(function.get("parameters"))
                )
        except Exception as e:
            verbose_logger.error(
                "OpenTelemetry: Error setting tools attributes: %s", str(e)
            )
            pass
    def set_attributes(self, span: Span, kwargs, response_obj):
        from opentelemetry.semconv.ai import SpanAttributes
@ -239,7 +285,8 @@ class OpenTelemetry(CustomLogger):
        #############################################
        # The name of the LLM a request is being made to
-        span.set_attribute(SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model"))
+        if kwargs.get("model"):
            span.set_attribute(SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model"))
        # The Generative AI Provider: Azure, OpenAI, etc.
        span.set_attribute(
@ -248,64 +295,99 @@ class OpenTelemetry(CustomLogger):
        )
        # The maximum number of tokens the LLM generates for a request.
-        span.set_attribute(
+        if optional_params.get("max_tokens"):
-            SpanAttributes.LLM_REQUEST_MAX_TOKENS, optional_params.get("max_tokens")
+            span.set_attribute(
-        )
+                SpanAttributes.LLM_REQUEST_MAX_TOKENS, optional_params.get("max_tokens")
            )
        # The temperature setting for the LLM request.
-        span.set_attribute(
+        if optional_params.get("temperature"):
-            SpanAttributes.LLM_REQUEST_TEMPERATURE, optional_params.get("temperature")
+            span.set_attribute(
-        )
+                SpanAttributes.LLM_REQUEST_TEMPERATURE,
                optional_params.get("temperature"),
            )
        # The top_p sampling setting for the LLM request.
-        span.set_attribute(
+        if optional_params.get("top_p"):
            SpanAttributes.LLM_REQUEST_TOP_P, optional_params.get("top_p")
        )
        span.set_attribute(
            SpanAttributes.LLM_IS_STREAMING, optional_params.get("stream")
        )
        span.set_attribute(
            SpanAttributes.LLM_REQUEST_FUNCTIONS,
            optional_params.get("tools"),
        )
        span.set_attribute(SpanAttributes.LLM_USER, optional_params.get("user"))
        for idx, prompt in enumerate(kwargs.get("messages")):
            span.set_attribute(
-                f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
+                SpanAttributes.LLM_REQUEST_TOP_P, optional_params.get("top_p")
                prompt.get("role"),
            )
            span.set_attribute(
                f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
                prompt.get("content"),
            )
        span.set_attribute(
            SpanAttributes.LLM_IS_STREAMING, optional_params.get("stream", False)
        )
        if optional_params.get("tools"):
            tools = optional_params["tools"]
            self.set_tools_attributes(span, tools)
        if optional_params.get("user"):
            span.set_attribute(SpanAttributes.LLM_USER, optional_params.get("user"))
        if kwargs.get("messages"):
            for idx, prompt in enumerate(kwargs.get("messages")):
                if prompt.get("role"):
                    span.set_attribute(
                        f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
                        prompt.get("role"),
                    )
                if prompt.get("content"):
                    if not isinstance(prompt.get("content"), str):
                        prompt["content"] = str(prompt.get("content"))
                    span.set_attribute(
                        f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
                        prompt.get("content"),
                    )
        #############################################
        ########## LLM Response Attributes ##########
        #############################################
        if response_obj.get("choices"):
            for idx, choice in enumerate(response_obj.get("choices")):
                if choice.get("finish_reason"):
                    span.set_attribute(
                        f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
                        choice.get("finish_reason"),
                    )
                if choice.get("message"):
                    if choice.get("message").get("role"):
                        span.set_attribute(
                            f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
                            choice.get("message").get("role"),
                        )
                    if choice.get("message").get("content"):
                        if not isinstance(choice.get("message").get("content"), str):
                            choice["message"]["content"] = str(
                                choice.get("message").get("content")
                            )
                        span.set_attribute(
                            f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
                            choice.get("message").get("content"),
                        )
-        for idx, choice in enumerate(response_obj.get("choices")):
+                    message = choice.get("message")
-            span.set_attribute(
+                    if not isinstance(message, dict):
-                f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
+                        message = message.dict()
-                choice.get("finish_reason"),
+                    tool_calls = message.get("tool_calls")
-            )
+                    if tool_calls:
-            span.set_attribute(
+                        span.set_attribute(
-                f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
+                            f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.function_call.name",
-                choice.get("message").get("role"),
+                            tool_calls[0].get("function").get("name"),
-            )
+                        )
-            span.set_attribute(
+                        span.set_attribute(
-                f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
+                            f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.function_call.arguments",
-                choice.get("message").get("content"),
+                            tool_calls[0].get("function").get("arguments"),
-            )
+                        )
        # The unique identifier for the completion.
-        span.set_attribute("gen_ai.response.id", response_obj.get("id"))
+        if response_obj.get("id"):
            span.set_attribute("gen_ai.response.id", response_obj.get("id"))
        # The model used to generate the response.
-        span.set_attribute(SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model"))
+        if response_obj.get("model"):
            span.set_attribute(
                SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
            )
        usage = response_obj.get("usage")
        if usage:
@ -326,11 +408,53 @@ class OpenTelemetry(CustomLogger):
                usage.get("prompt_tokens"),
            )
    def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
        from opentelemetry.semconv.ai import SpanAttributes
        optional_params = kwargs.get("optional_params", {})
        litellm_params = kwargs.get("litellm_params", {}) or {}
        custom_llm_provider = litellm_params.get("custom_llm_provider", "Unknown")
        _raw_response = kwargs.get("original_response")
        _additional_args = kwargs.get("additional_args", {}) or {}
        complete_input_dict = _additional_args.get("complete_input_dict")
        #############################################
        ########## LLM Request Attributes ###########
        #############################################
        # OTEL Attributes for the RAW Request to https://docs.anthropic.com/en/api/messages
        if complete_input_dict:
            for param, val in complete_input_dict.items():
                if not isinstance(val, str):
                    val = str(val)
                span.set_attribute(
                    f"llm.{custom_llm_provider}.{param}",
                    val,
                )
        #############################################
        ########## LLM Response Attributes ##########
        #############################################
        if _raw_response:
            # cast sr -> dict
            import json
            _raw_response = json.loads(_raw_response)
            for param, val in _raw_response.items():
                if not isinstance(val, str):
                    val = str(val)
                span.set_attribute(
                    f"llm.{custom_llm_provider}.{param}",
                    val,
                )
        pass
    def _to_ns(self, dt):
        return int(dt.timestamp() * 1e9)
    def _get_span_name(self, kwargs):
-        return f"litellm-{kwargs.get('call_type', 'completion')}"
+        return LITELLM_REQUEST_SPAN_NAME
    def _get_span_context(self, kwargs):
        from opentelemetry.trace.propagation.tracecontext import (
--- a/litellm/tests/test_async_opentelemetry.py
+++ b/litellm/tests/test_async_opentelemetry.py
@ -0,0 +1,88 @@
 import asyncio
 import litellm
 from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
 from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
 from litellm._logging import verbose_logger
 import logging
 import time
 import pytest
 verbose_logger.setLevel(logging.DEBUG)
@pytest.mark.skip(
    reason="new test. WIP. works locally but not on CI. Still figuring this out"
 )
@pytest.mark.asyncio
 async def test_otel_callback():
    exporter = InMemorySpanExporter()
    litellm.set_verbose = True
    litellm.callbacks = [OpenTelemetry(OpenTelemetryConfig(exporter=exporter))]
    await litellm.acompletion(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": "hi"}],
        temperature=0.1,
        user="OTEL_USER",
    )
    await asyncio.sleep(4)
    spans = exporter.get_finished_spans()
    print("spans", spans)
    assert len(spans) == 2
@pytest.mark.parametrize(
    "model",
    ["anthropic/claude-3-opus-20240229"],
 )
@pytest.mark.skip(reason="Local only test. WIP.")
 def test_completion_claude_3_function_call_with_otel(model):
    litellm.set_verbose = True
    litellm.callbacks = [OpenTelemetry(OpenTelemetryConfig())]
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_current_weather",
                "description": "Get the current weather in a given location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g. San Francisco, CA",
                        },
                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                    },
                    "required": ["location"],
                },
            },
        }
    ]
    messages = [
        {
            "role": "user",
            "content": "What's the weather like in Boston today in Fahrenheit?",
        }
    ]
    try:
        # test without max tokens
        response = litellm.completion(
            model=model,
            messages=messages,
            tools=tools,
            tool_choice={
                "type": "function",
                "function": {"name": "get_current_weather"},
            },
            drop_params=True,
        )
        print("response from LiteLLM", response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
--- a/litellm/tests/test_opentelemetry.py
+++ b/litellm/tests/test_opentelemetry.py
@ -1,35 +0,0 @@
 import asyncio
 import litellm
 from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
 from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
 from litellm._logging import verbose_logger
 import logging
 import time
 import pytest
 verbose_logger.setLevel(logging.DEBUG)
@pytest.mark.skip(reason="new test")
 def test_otel_callback():
    exporter = InMemorySpanExporter()
    litellm.callbacks = [OpenTelemetry(OpenTelemetryConfig(exporter=exporter))]
    litellm.completion(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": "hi"}],
    )
    asyncio.run(
        litellm.acompletion(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "hi"}],
        )
    )
    time.sleep(4)
    spans = exporter.get_finished_spans()
    assert len(spans) == 1 + 1