From 17833b117d36fbee6f10eec023cfdbff4d15d381 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 8 Jun 2024 09:58:52 -0700
Subject: [PATCH] feat - OTEL log litellm request / response

---
 litellm/integrations/opentelemetry.py | 106 ++++++++++++++++++++++++--
 1 file changed, 101 insertions(+), 5 deletions(-)

diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py
index 84b2f88c6..c8b34c477 100644
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@@ -223,10 +223,106 @@ class OpenTelemetry(CustomLogger):
         self.set_attributes(span, kwargs, response_obj)
         span.end(end_time=self._to_ns(end_time))
 
-    def set_attributes(self, span, kwargs, response_obj):
-        for key in ["model", "api_base", "api_version"]:
-            if key in kwargs:
-                span.set_attribute(key, kwargs[key])
+    def set_attributes(self, span: Span, kwargs, response_obj):
+        from opentelemetry.semconv.ai import SpanAttributes
+
+        optional_params = kwargs.get("optional_params", {})
+        litellm_params = kwargs.get("litellm_params", {}) or {}
+
+        # https://github.com/open-telemetry/semantic-conventions/blob/main/model/registry/gen-ai.yaml
+        # Following Conventions here: https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/llm-spans.md
+
+        #############################################
+        ########## LLM Request Attributes ###########
+        #############################################
+
+        # The name of the LLM a request is being made to
+        span.set_attribute(SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model"))
+
+        # The Generative AI Provider: Azure, OpenAI, etc.
+        span.set_attribute(
+            SpanAttributes.LLM_SYSTEM,
+            litellm_params.get("custom_llm_provider", "Unknown"),
+        )
+
+        # The maximum number of tokens the LLM generates for a request.
+        span.set_attribute(
+            SpanAttributes.LLM_REQUEST_MAX_TOKENS, optional_params.get("max_tokens")
+        )
+
+        # The temperature setting for the LLM request.
+        span.set_attribute(
+            SpanAttributes.LLM_REQUEST_TEMPERATURE, optional_params.get("temperature")
+        )
+
+        # The top_p sampling setting for the LLM request.
+        span.set_attribute(
+            SpanAttributes.LLM_REQUEST_TOP_P, optional_params.get("top_p")
+        )
+
+        span.set_attribute(
+            SpanAttributes.LLM_IS_STREAMING, optional_params.get("stream")
+        )
+
+        span.set_attribute(
+            SpanAttributes.LLM_REQUEST_FUNCTIONS,
+            optional_params.get("tools"),
+        )
+
+        span.set_attribute(SpanAttributes.LLM_USER, optional_params.get("user"))
+
+        for idx, prompt in enumerate(kwargs.get("messages")):
+            span.set_attribute(
+                f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
+                prompt.get("role"),
+            )
+            span.set_attribute(
+                f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
+                prompt.get("content"),
+            )
+
+        #############################################
+        ########## LLM Response Attributes ##########
+        #############################################
+
+        for idx, choice in enumerate(response_obj.get("choices")):
+            span.set_attribute(
+                f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
+                choice.get("finish_reason"),
+            )
+            span.set_attribute(
+                f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
+                choice.get("message").get("role"),
+            )
+            span.set_attribute(
+                f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
+                choice.get("message").get("content"),
+            )
+
+        # The unique identifier for the completion.
+        span.set_attribute("gen_ai.response.id", response_obj.get("id"))
+
+        # The model used to generate the response.
+        span.set_attribute(SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model"))
+
+        usage = response_obj.get("usage")
+        if usage:
+            span.set_attribute(
+                SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
+                usage.get("total_tokens"),
+            )
+
+            # The number of tokens used in the LLM response (completion).
+            span.set_attribute(
+                SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
+                usage.get("completion_tokens"),
+            )
+
+            # The number of tokens used in the LLM prompt.
+            span.set_attribute(
+                SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
+                usage.get("prompt_tokens"),
+            )
 
     def _to_ns(self, dt):
         return int(dt.timestamp() * 1e9)
@@ -244,7 +340,7 @@ class OpenTelemetry(CustomLogger):
         proxy_server_request = litellm_params.get("proxy_server_request", {}) or {}
         headers = proxy_server_request.get("headers", {}) or {}
         traceparent = headers.get("traceparent", None)
-        _metadata = litellm_params.get("metadata", {})
+        _metadata = litellm_params.get("metadata", {}) or {}
         parent_otel_span = _metadata.get("litellm_parent_otel_span", None)
 
         """