mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
* feat: Added Missing Attributes For Arize & Phoenix Integration * chore: Added noqa for PLR0915 to suppress warning * chore: Moved Contributor Test to Correct Location * chore: Removed Redundant Fallback Co-authored-by: Ali Saleh <saleh.a@turing.com>
287 lines
9.9 KiB
Python
287 lines
9.9 KiB
Python
import json
|
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
|
|
from litellm._logging import verbose_logger
|
|
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
|
|
from litellm.types.utils import StandardLoggingPayload
|
|
|
|
if TYPE_CHECKING:
|
|
from opentelemetry.trace import Span as _Span
|
|
|
|
Span = Union[_Span, Any]
|
|
else:
|
|
Span = Any
|
|
|
|
|
|
def cast_as_primitive_value_type(value) -> Union[str, bool, int, float]:
|
|
"""
|
|
Converts a value to an OTEL-supported primitive for Arize/Phoenix observability.
|
|
"""
|
|
if value is None:
|
|
return ""
|
|
if isinstance(value, (str, bool, int, float)):
|
|
return value
|
|
try:
|
|
return str(value)
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
def safe_set_attribute(span: Span, key: str, value: Any):
|
|
"""
|
|
Sets a span attribute safely with OTEL-compliant primitive typing for Arize/Phoenix.
|
|
"""
|
|
primitive_value = cast_as_primitive_value_type(value)
|
|
span.set_attribute(key, primitive_value)
|
|
|
|
|
|
def set_attributes(span: Span, kwargs, response_obj): # noqa: PLR0915
|
|
"""
|
|
Populates span with OpenInference-compliant LLM attributes for Arize and Phoenix tracing.
|
|
"""
|
|
from litellm.integrations._types.open_inference import (
|
|
MessageAttributes,
|
|
OpenInferenceSpanKindValues,
|
|
SpanAttributes,
|
|
ToolCallAttributes,
|
|
)
|
|
|
|
try:
|
|
optional_params = kwargs.get("optional_params", {})
|
|
litellm_params = kwargs.get("litellm_params", {})
|
|
standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
|
|
"standard_logging_object"
|
|
)
|
|
if standard_logging_payload is None:
|
|
raise ValueError("standard_logging_object not found in kwargs")
|
|
|
|
#############################################
|
|
############ LLM CALL METADATA ##############
|
|
#############################################
|
|
|
|
# Set custom metadata for observability and trace enrichment.
|
|
metadata = (
|
|
standard_logging_payload.get("metadata")
|
|
if standard_logging_payload
|
|
else None
|
|
)
|
|
if metadata is not None:
|
|
safe_set_attribute(span, SpanAttributes.METADATA, safe_dumps(metadata))
|
|
|
|
#############################################
|
|
########## LLM Request Attributes ###########
|
|
#############################################
|
|
|
|
# The name of the LLM a request is being made to.
|
|
if kwargs.get("model"):
|
|
safe_set_attribute(
|
|
span,
|
|
SpanAttributes.LLM_MODEL_NAME,
|
|
kwargs.get("model"),
|
|
)
|
|
|
|
# The LLM request type.
|
|
safe_set_attribute(
|
|
span,
|
|
"llm.request.type",
|
|
standard_logging_payload["call_type"],
|
|
)
|
|
|
|
# The Generative AI Provider: Azure, OpenAI, etc.
|
|
safe_set_attribute(
|
|
span,
|
|
SpanAttributes.LLM_PROVIDER,
|
|
litellm_params.get("custom_llm_provider", "Unknown"),
|
|
)
|
|
|
|
# The maximum number of tokens the LLM generates for a request.
|
|
if optional_params.get("max_tokens"):
|
|
safe_set_attribute(
|
|
span,
|
|
"llm.request.max_tokens",
|
|
optional_params.get("max_tokens"),
|
|
)
|
|
|
|
# The temperature setting for the LLM request.
|
|
if optional_params.get("temperature"):
|
|
safe_set_attribute(
|
|
span,
|
|
"llm.request.temperature",
|
|
optional_params.get("temperature"),
|
|
)
|
|
|
|
# The top_p sampling setting for the LLM request.
|
|
if optional_params.get("top_p"):
|
|
safe_set_attribute(
|
|
span,
|
|
"llm.request.top_p",
|
|
optional_params.get("top_p"),
|
|
)
|
|
|
|
# Indicates whether response is streamed.
|
|
safe_set_attribute(
|
|
span,
|
|
"llm.is_streaming",
|
|
str(optional_params.get("stream", False)),
|
|
)
|
|
|
|
# Logs the user ID if present.
|
|
if optional_params.get("user"):
|
|
safe_set_attribute(
|
|
span,
|
|
"llm.user",
|
|
optional_params.get("user"),
|
|
)
|
|
|
|
# The unique identifier for the completion.
|
|
if response_obj and response_obj.get("id"):
|
|
safe_set_attribute(span, "llm.response.id", response_obj.get("id"))
|
|
|
|
# The model used to generate the response.
|
|
if response_obj and response_obj.get("model"):
|
|
safe_set_attribute(
|
|
span,
|
|
"llm.response.model",
|
|
response_obj.get("model"),
|
|
)
|
|
|
|
# Required by OpenInference to mark span as LLM kind.
|
|
safe_set_attribute(
|
|
span,
|
|
SpanAttributes.OPENINFERENCE_SPAN_KIND,
|
|
OpenInferenceSpanKindValues.LLM.value,
|
|
)
|
|
messages = kwargs.get("messages")
|
|
|
|
# for /chat/completions
|
|
# https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions
|
|
if messages:
|
|
last_message = messages[-1]
|
|
safe_set_attribute(
|
|
span,
|
|
SpanAttributes.INPUT_VALUE,
|
|
last_message.get("content", ""),
|
|
)
|
|
|
|
# LLM_INPUT_MESSAGES shows up under `input_messages` tab on the span page.
|
|
for idx, msg in enumerate(messages):
|
|
prefix = f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}"
|
|
# Set the role per message.
|
|
safe_set_attribute(
|
|
span, f"{prefix}.{MessageAttributes.MESSAGE_ROLE}", msg.get("role")
|
|
)
|
|
# Set the content per message.
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{MessageAttributes.MESSAGE_CONTENT}",
|
|
msg.get("content", ""),
|
|
)
|
|
|
|
# Capture tools (function definitions) used in the LLM call.
|
|
tools = optional_params.get("tools")
|
|
if tools:
|
|
for idx, tool in enumerate(tools):
|
|
function = tool.get("function")
|
|
if not function:
|
|
continue
|
|
prefix = f"{SpanAttributes.LLM_TOOLS}.{idx}"
|
|
safe_set_attribute(
|
|
span, f"{prefix}.{SpanAttributes.TOOL_NAME}", function.get("name")
|
|
)
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{SpanAttributes.TOOL_DESCRIPTION}",
|
|
function.get("description"),
|
|
)
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{SpanAttributes.TOOL_PARAMETERS}",
|
|
json.dumps(function.get("parameters")),
|
|
)
|
|
|
|
# Capture tool calls made during function-calling LLM flows.
|
|
functions = optional_params.get("functions")
|
|
if functions:
|
|
for idx, function in enumerate(functions):
|
|
prefix = f"{MessageAttributes.MESSAGE_TOOL_CALLS}.{idx}"
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{ToolCallAttributes.TOOL_CALL_FUNCTION_NAME}",
|
|
function.get("name"),
|
|
)
|
|
|
|
# Capture invocation parameters and user ID if available.
|
|
model_params = (
|
|
standard_logging_payload.get("model_parameters")
|
|
if standard_logging_payload
|
|
else None
|
|
)
|
|
if model_params:
|
|
# The Generative AI Provider: Azure, OpenAI, etc.
|
|
safe_set_attribute(
|
|
span,
|
|
SpanAttributes.LLM_INVOCATION_PARAMETERS,
|
|
safe_dumps(model_params),
|
|
)
|
|
|
|
if model_params.get("user"):
|
|
user_id = model_params.get("user")
|
|
if user_id is not None:
|
|
safe_set_attribute(span, SpanAttributes.USER_ID, user_id)
|
|
|
|
#############################################
|
|
########## LLM Response Attributes ##########
|
|
#############################################
|
|
|
|
# Captures response tokens, message, and content.
|
|
if hasattr(response_obj, "get"):
|
|
for idx, choice in enumerate(response_obj.get("choices", [])):
|
|
response_message = choice.get("message", {})
|
|
safe_set_attribute(
|
|
span,
|
|
SpanAttributes.OUTPUT_VALUE,
|
|
response_message.get("content", ""),
|
|
)
|
|
|
|
# This shows up under `output_messages` tab on the span page.
|
|
prefix = f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}"
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{MessageAttributes.MESSAGE_ROLE}",
|
|
response_message.get("role"),
|
|
)
|
|
safe_set_attribute(
|
|
span,
|
|
f"{prefix}.{MessageAttributes.MESSAGE_CONTENT}",
|
|
response_message.get("content", ""),
|
|
)
|
|
|
|
# Token usage info.
|
|
usage = response_obj and response_obj.get("usage")
|
|
if usage:
|
|
safe_set_attribute(
|
|
span,
|
|
SpanAttributes.LLM_TOKEN_COUNT_TOTAL,
|
|
usage.get("total_tokens"),
|
|
)
|
|
|
|
# The number of tokens used in the LLM response (completion).
|
|
safe_set_attribute(
|
|
span,
|
|
SpanAttributes.LLM_TOKEN_COUNT_COMPLETION,
|
|
usage.get("completion_tokens"),
|
|
)
|
|
|
|
# The number of tokens used in the LLM prompt.
|
|
safe_set_attribute(
|
|
span,
|
|
SpanAttributes.LLM_TOKEN_COUNT_PROMPT,
|
|
usage.get("prompt_tokens"),
|
|
)
|
|
|
|
except Exception as e:
|
|
verbose_logger.error(
|
|
f"[Arize/Phoenix] Failed to set OpenInference span attributes: {e}"
|
|
)
|
|
if hasattr(span, "record_exception"):
|
|
span.record_exception(e)
|