mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
Merge pull request #4078 from BerriAI/litellm_low_raw_request_response_otel
[FEAT]- OTEL Log raw LLM request/response on OTEL
This commit is contained in:
commit
8c5802d506
3 changed files with 257 additions and 80 deletions
|
@ -1,6 +1,7 @@
|
||||||
import os
|
import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
import litellm
|
||||||
|
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
|
@ -22,6 +23,8 @@ LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
|
||||||
LITELLM_RESOURCE = {
|
LITELLM_RESOURCE = {
|
||||||
"service.name": os.getenv("OTEL_SERVICE_NAME", "litellm"),
|
"service.name": os.getenv("OTEL_SERVICE_NAME", "litellm"),
|
||||||
}
|
}
|
||||||
|
RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request"
|
||||||
|
LITELLM_REQUEST_SPAN_NAME = "litellm_request"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@ -194,6 +197,7 @@ class OpenTelemetry(CustomLogger):
|
||||||
|
|
||||||
def _handle_sucess(self, kwargs, response_obj, start_time, end_time):
|
def _handle_sucess(self, kwargs, response_obj, start_time, end_time):
|
||||||
from opentelemetry.trace import Status, StatusCode
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
from opentelemetry import trace
|
||||||
|
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
"OpenTelemetry Logger: Logging kwargs: %s, OTEL config settings=%s",
|
"OpenTelemetry Logger: Logging kwargs: %s, OTEL config settings=%s",
|
||||||
|
@ -202,6 +206,7 @@ class OpenTelemetry(CustomLogger):
|
||||||
)
|
)
|
||||||
_parent_context, parent_otel_span = self._get_span_context(kwargs)
|
_parent_context, parent_otel_span = self._get_span_context(kwargs)
|
||||||
|
|
||||||
|
# Span 1: Requst sent to litellm SDK
|
||||||
span = self.tracer.start_span(
|
span = self.tracer.start_span(
|
||||||
name=self._get_span_name(kwargs),
|
name=self._get_span_name(kwargs),
|
||||||
start_time=self._to_ns(start_time),
|
start_time=self._to_ns(start_time),
|
||||||
|
@ -209,7 +214,23 @@ class OpenTelemetry(CustomLogger):
|
||||||
)
|
)
|
||||||
span.set_status(Status(StatusCode.OK))
|
span.set_status(Status(StatusCode.OK))
|
||||||
self.set_attributes(span, kwargs, response_obj)
|
self.set_attributes(span, kwargs, response_obj)
|
||||||
|
|
||||||
|
if litellm.turn_off_message_logging is True:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# Span 2: Raw Request / Response to LLM
|
||||||
|
raw_request_span = self.tracer.start_span(
|
||||||
|
name=RAW_REQUEST_SPAN_NAME,
|
||||||
|
start_time=self._to_ns(start_time),
|
||||||
|
context=trace.set_span_in_context(span),
|
||||||
|
)
|
||||||
|
|
||||||
|
raw_request_span.set_status(Status(StatusCode.OK))
|
||||||
|
self.set_raw_request_attributes(raw_request_span, kwargs, response_obj)
|
||||||
|
raw_request_span.end(end_time=self._to_ns(end_time))
|
||||||
|
|
||||||
span.end(end_time=self._to_ns(end_time))
|
span.end(end_time=self._to_ns(end_time))
|
||||||
|
|
||||||
if parent_otel_span is not None:
|
if parent_otel_span is not None:
|
||||||
parent_otel_span.end(end_time=self._to_ns(datetime.now()))
|
parent_otel_span.end(end_time=self._to_ns(datetime.now()))
|
||||||
|
|
||||||
|
@ -225,6 +246,31 @@ class OpenTelemetry(CustomLogger):
|
||||||
self.set_attributes(span, kwargs, response_obj)
|
self.set_attributes(span, kwargs, response_obj)
|
||||||
span.end(end_time=self._to_ns(end_time))
|
span.end(end_time=self._to_ns(end_time))
|
||||||
|
|
||||||
|
def set_tools_attributes(self, span: Span, tools):
|
||||||
|
from opentelemetry.semconv.ai import SpanAttributes
|
||||||
|
import json
|
||||||
|
|
||||||
|
if not tools:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
for i, tool in enumerate(tools):
|
||||||
|
function = tool.get("function")
|
||||||
|
if not function:
|
||||||
|
continue
|
||||||
|
|
||||||
|
prefix = f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}"
|
||||||
|
span.set_attribute(f"{prefix}.name", function.get("name"))
|
||||||
|
span.set_attribute(f"{prefix}.description", function.get("description"))
|
||||||
|
span.set_attribute(
|
||||||
|
f"{prefix}.parameters", json.dumps(function.get("parameters"))
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.error(
|
||||||
|
"OpenTelemetry: Error setting tools attributes: %s", str(e)
|
||||||
|
)
|
||||||
|
pass
|
||||||
|
|
||||||
def set_attributes(self, span: Span, kwargs, response_obj):
|
def set_attributes(self, span: Span, kwargs, response_obj):
|
||||||
from opentelemetry.semconv.ai import SpanAttributes
|
from opentelemetry.semconv.ai import SpanAttributes
|
||||||
|
|
||||||
|
@ -239,7 +285,8 @@ class OpenTelemetry(CustomLogger):
|
||||||
#############################################
|
#############################################
|
||||||
|
|
||||||
# The name of the LLM a request is being made to
|
# The name of the LLM a request is being made to
|
||||||
span.set_attribute(SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model"))
|
if kwargs.get("model"):
|
||||||
|
span.set_attribute(SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model"))
|
||||||
|
|
||||||
# The Generative AI Provider: Azure, OpenAI, etc.
|
# The Generative AI Provider: Azure, OpenAI, etc.
|
||||||
span.set_attribute(
|
span.set_attribute(
|
||||||
|
@ -248,64 +295,99 @@ class OpenTelemetry(CustomLogger):
|
||||||
)
|
)
|
||||||
|
|
||||||
# The maximum number of tokens the LLM generates for a request.
|
# The maximum number of tokens the LLM generates for a request.
|
||||||
span.set_attribute(
|
if optional_params.get("max_tokens"):
|
||||||
SpanAttributes.LLM_REQUEST_MAX_TOKENS, optional_params.get("max_tokens")
|
span.set_attribute(
|
||||||
)
|
SpanAttributes.LLM_REQUEST_MAX_TOKENS, optional_params.get("max_tokens")
|
||||||
|
)
|
||||||
|
|
||||||
# The temperature setting for the LLM request.
|
# The temperature setting for the LLM request.
|
||||||
span.set_attribute(
|
if optional_params.get("temperature"):
|
||||||
SpanAttributes.LLM_REQUEST_TEMPERATURE, optional_params.get("temperature")
|
span.set_attribute(
|
||||||
)
|
SpanAttributes.LLM_REQUEST_TEMPERATURE,
|
||||||
|
optional_params.get("temperature"),
|
||||||
|
)
|
||||||
|
|
||||||
# The top_p sampling setting for the LLM request.
|
# The top_p sampling setting for the LLM request.
|
||||||
span.set_attribute(
|
if optional_params.get("top_p"):
|
||||||
SpanAttributes.LLM_REQUEST_TOP_P, optional_params.get("top_p")
|
|
||||||
)
|
|
||||||
|
|
||||||
span.set_attribute(
|
|
||||||
SpanAttributes.LLM_IS_STREAMING, optional_params.get("stream")
|
|
||||||
)
|
|
||||||
|
|
||||||
span.set_attribute(
|
|
||||||
SpanAttributes.LLM_REQUEST_FUNCTIONS,
|
|
||||||
optional_params.get("tools"),
|
|
||||||
)
|
|
||||||
|
|
||||||
span.set_attribute(SpanAttributes.LLM_USER, optional_params.get("user"))
|
|
||||||
|
|
||||||
for idx, prompt in enumerate(kwargs.get("messages")):
|
|
||||||
span.set_attribute(
|
span.set_attribute(
|
||||||
f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
|
SpanAttributes.LLM_REQUEST_TOP_P, optional_params.get("top_p")
|
||||||
prompt.get("role"),
|
|
||||||
)
|
|
||||||
span.set_attribute(
|
|
||||||
f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
|
|
||||||
prompt.get("content"),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_IS_STREAMING, optional_params.get("stream", False)
|
||||||
|
)
|
||||||
|
|
||||||
|
if optional_params.get("tools"):
|
||||||
|
tools = optional_params["tools"]
|
||||||
|
self.set_tools_attributes(span, tools)
|
||||||
|
|
||||||
|
if optional_params.get("user"):
|
||||||
|
span.set_attribute(SpanAttributes.LLM_USER, optional_params.get("user"))
|
||||||
|
|
||||||
|
if kwargs.get("messages"):
|
||||||
|
for idx, prompt in enumerate(kwargs.get("messages")):
|
||||||
|
if prompt.get("role"):
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
|
||||||
|
prompt.get("role"),
|
||||||
|
)
|
||||||
|
|
||||||
|
if prompt.get("content"):
|
||||||
|
if not isinstance(prompt.get("content"), str):
|
||||||
|
prompt["content"] = str(prompt.get("content"))
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
|
||||||
|
prompt.get("content"),
|
||||||
|
)
|
||||||
#############################################
|
#############################################
|
||||||
########## LLM Response Attributes ##########
|
########## LLM Response Attributes ##########
|
||||||
#############################################
|
#############################################
|
||||||
|
if response_obj.get("choices"):
|
||||||
|
for idx, choice in enumerate(response_obj.get("choices")):
|
||||||
|
if choice.get("finish_reason"):
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
|
||||||
|
choice.get("finish_reason"),
|
||||||
|
)
|
||||||
|
if choice.get("message"):
|
||||||
|
if choice.get("message").get("role"):
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
|
||||||
|
choice.get("message").get("role"),
|
||||||
|
)
|
||||||
|
if choice.get("message").get("content"):
|
||||||
|
if not isinstance(choice.get("message").get("content"), str):
|
||||||
|
choice["message"]["content"] = str(
|
||||||
|
choice.get("message").get("content")
|
||||||
|
)
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
|
||||||
|
choice.get("message").get("content"),
|
||||||
|
)
|
||||||
|
|
||||||
for idx, choice in enumerate(response_obj.get("choices")):
|
message = choice.get("message")
|
||||||
span.set_attribute(
|
if not isinstance(message, dict):
|
||||||
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
|
message = message.dict()
|
||||||
choice.get("finish_reason"),
|
tool_calls = message.get("tool_calls")
|
||||||
)
|
if tool_calls:
|
||||||
span.set_attribute(
|
span.set_attribute(
|
||||||
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
|
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.function_call.name",
|
||||||
choice.get("message").get("role"),
|
tool_calls[0].get("function").get("name"),
|
||||||
)
|
)
|
||||||
span.set_attribute(
|
span.set_attribute(
|
||||||
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
|
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.function_call.arguments",
|
||||||
choice.get("message").get("content"),
|
tool_calls[0].get("function").get("arguments"),
|
||||||
)
|
)
|
||||||
|
|
||||||
# The unique identifier for the completion.
|
# The unique identifier for the completion.
|
||||||
span.set_attribute("gen_ai.response.id", response_obj.get("id"))
|
if response_obj.get("id"):
|
||||||
|
span.set_attribute("gen_ai.response.id", response_obj.get("id"))
|
||||||
|
|
||||||
# The model used to generate the response.
|
# The model used to generate the response.
|
||||||
span.set_attribute(SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model"))
|
if response_obj.get("model"):
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
|
||||||
|
)
|
||||||
|
|
||||||
usage = response_obj.get("usage")
|
usage = response_obj.get("usage")
|
||||||
if usage:
|
if usage:
|
||||||
|
@ -326,11 +408,53 @@ class OpenTelemetry(CustomLogger):
|
||||||
usage.get("prompt_tokens"),
|
usage.get("prompt_tokens"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
|
||||||
|
from opentelemetry.semconv.ai import SpanAttributes
|
||||||
|
|
||||||
|
optional_params = kwargs.get("optional_params", {})
|
||||||
|
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||||
|
custom_llm_provider = litellm_params.get("custom_llm_provider", "Unknown")
|
||||||
|
|
||||||
|
_raw_response = kwargs.get("original_response")
|
||||||
|
_additional_args = kwargs.get("additional_args", {}) or {}
|
||||||
|
complete_input_dict = _additional_args.get("complete_input_dict")
|
||||||
|
#############################################
|
||||||
|
########## LLM Request Attributes ###########
|
||||||
|
#############################################
|
||||||
|
|
||||||
|
# OTEL Attributes for the RAW Request to https://docs.anthropic.com/en/api/messages
|
||||||
|
if complete_input_dict:
|
||||||
|
for param, val in complete_input_dict.items():
|
||||||
|
if not isinstance(val, str):
|
||||||
|
val = str(val)
|
||||||
|
span.set_attribute(
|
||||||
|
f"llm.{custom_llm_provider}.{param}",
|
||||||
|
val,
|
||||||
|
)
|
||||||
|
|
||||||
|
#############################################
|
||||||
|
########## LLM Response Attributes ##########
|
||||||
|
#############################################
|
||||||
|
if _raw_response:
|
||||||
|
# cast sr -> dict
|
||||||
|
import json
|
||||||
|
|
||||||
|
_raw_response = json.loads(_raw_response)
|
||||||
|
for param, val in _raw_response.items():
|
||||||
|
if not isinstance(val, str):
|
||||||
|
val = str(val)
|
||||||
|
span.set_attribute(
|
||||||
|
f"llm.{custom_llm_provider}.{param}",
|
||||||
|
val,
|
||||||
|
)
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
def _to_ns(self, dt):
|
def _to_ns(self, dt):
|
||||||
return int(dt.timestamp() * 1e9)
|
return int(dt.timestamp() * 1e9)
|
||||||
|
|
||||||
def _get_span_name(self, kwargs):
|
def _get_span_name(self, kwargs):
|
||||||
return f"litellm-{kwargs.get('call_type', 'completion')}"
|
return LITELLM_REQUEST_SPAN_NAME
|
||||||
|
|
||||||
def _get_span_context(self, kwargs):
|
def _get_span_context(self, kwargs):
|
||||||
from opentelemetry.trace.propagation.tracecontext import (
|
from opentelemetry.trace.propagation.tracecontext import (
|
||||||
|
|
88
litellm/tests/test_async_opentelemetry.py
Normal file
88
litellm/tests/test_async_opentelemetry.py
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
import asyncio
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
|
||||||
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
verbose_logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(
|
||||||
|
reason="new test. WIP. works locally but not on CI. Still figuring this out"
|
||||||
|
)
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_otel_callback():
|
||||||
|
exporter = InMemorySpanExporter()
|
||||||
|
litellm.set_verbose = True
|
||||||
|
litellm.callbacks = [OpenTelemetry(OpenTelemetryConfig(exporter=exporter))]
|
||||||
|
|
||||||
|
await litellm.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "hi"}],
|
||||||
|
temperature=0.1,
|
||||||
|
user="OTEL_USER",
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(4)
|
||||||
|
|
||||||
|
spans = exporter.get_finished_spans()
|
||||||
|
print("spans", spans)
|
||||||
|
assert len(spans) == 2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"model",
|
||||||
|
["anthropic/claude-3-opus-20240229"],
|
||||||
|
)
|
||||||
|
@pytest.mark.skip(reason="Local only test. WIP.")
|
||||||
|
def test_completion_claude_3_function_call_with_otel(model):
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
litellm.callbacks = [OpenTelemetry(OpenTelemetryConfig())]
|
||||||
|
tools = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"description": "Get the current weather in a given location",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
|
},
|
||||||
|
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||||
|
},
|
||||||
|
"required": ["location"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What's the weather like in Boston today in Fahrenheit?",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
# test without max tokens
|
||||||
|
response = litellm.completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
tools=tools,
|
||||||
|
tool_choice={
|
||||||
|
"type": "function",
|
||||||
|
"function": {"name": "get_current_weather"},
|
||||||
|
},
|
||||||
|
drop_params=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("response from LiteLLM", response)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
|
@ -1,35 +0,0 @@
|
||||||
import asyncio
|
|
||||||
import litellm
|
|
||||||
|
|
||||||
from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
|
|
||||||
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
|
||||||
from litellm._logging import verbose_logger
|
|
||||||
import logging
|
|
||||||
import time
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
verbose_logger.setLevel(logging.DEBUG)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="new test")
|
|
||||||
def test_otel_callback():
|
|
||||||
exporter = InMemorySpanExporter()
|
|
||||||
|
|
||||||
litellm.callbacks = [OpenTelemetry(OpenTelemetryConfig(exporter=exporter))]
|
|
||||||
|
|
||||||
litellm.completion(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[{"role": "user", "content": "hi"}],
|
|
||||||
)
|
|
||||||
|
|
||||||
asyncio.run(
|
|
||||||
litellm.acompletion(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[{"role": "user", "content": "hi"}],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
time.sleep(4)
|
|
||||||
|
|
||||||
spans = exporter.get_finished_spans()
|
|
||||||
assert len(spans) == 1 + 1
|
|
Loading…
Add table
Add a link
Reference in a new issue