Merge pull request #3846 from nirga/revert-3831-revert-3780-traceloop-failures

fix: Log errors in Traceloop Integration (reverts previous revert)
This commit is contained in:
Krish Dholakia 2024-05-29 08:54:01 -07:00 committed by GitHub
commit c76deb8f76
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 176 additions and 139 deletions

View file

@ -1,114 +1,153 @@
import traceback
from litellm._logging import verbose_logger
import litellm
class TraceloopLogger: class TraceloopLogger:
def __init__(self): def __init__(self):
from traceloop.sdk.tracing.tracing import TracerWrapper try:
from traceloop.sdk import Traceloop from traceloop.sdk.tracing.tracing import TracerWrapper
from traceloop.sdk import Traceloop
from traceloop.sdk.instruments import Instruments
except ModuleNotFoundError as e:
verbose_logger.error(
f"Traceloop not installed, try running 'pip install traceloop-sdk' to fix this error: {e}\n{traceback.format_exc()}"
)
Traceloop.init(app_name="Litellm-Server", disable_batch=True) Traceloop.init(
app_name="Litellm-Server",
disable_batch=True,
instruments=[
Instruments.CHROMA,
Instruments.PINECONE,
Instruments.WEAVIATE,
Instruments.LLAMA_INDEX,
Instruments.LANGCHAIN,
],
)
self.tracer_wrapper = TracerWrapper() self.tracer_wrapper = TracerWrapper()
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose): def log_event(
from opentelemetry.trace import SpanKind self,
kwargs,
response_obj,
start_time,
end_time,
user_id,
print_verbose,
level="DEFAULT",
status_message=None,
):
from opentelemetry import trace
from opentelemetry.trace import SpanKind, Status, StatusCode
from opentelemetry.semconv.ai import SpanAttributes from opentelemetry.semconv.ai import SpanAttributes
try: try:
print_verbose(
f"Traceloop Logging - Enters logging function for model {kwargs}"
)
tracer = self.tracer_wrapper.get_tracer() tracer = self.tracer_wrapper.get_tracer()
model = kwargs.get("model")
# LiteLLM uses the standard OpenAI library, so it's already handled by Traceloop SDK
if kwargs.get("litellm_params").get("custom_llm_provider") == "openai":
return
optional_params = kwargs.get("optional_params", {}) optional_params = kwargs.get("optional_params", {})
with tracer.start_as_current_span( span = tracer.start_span(
"litellm.completion", "litellm.completion", kind=SpanKind.CLIENT, start_time=start_time
kind=SpanKind.CLIENT, )
) as span:
if span.is_recording(): if span.is_recording():
span.set_attribute(
SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model")
)
if "stop" in optional_params:
span.set_attribute( span.set_attribute(
SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model") SpanAttributes.LLM_CHAT_STOP_SEQUENCES,
optional_params.get("stop"),
) )
if "stop" in optional_params: if "frequency_penalty" in optional_params:
span.set_attribute(
SpanAttributes.LLM_CHAT_STOP_SEQUENCES,
optional_params.get("stop"),
)
if "frequency_penalty" in optional_params:
span.set_attribute(
SpanAttributes.LLM_FREQUENCY_PENALTY,
optional_params.get("frequency_penalty"),
)
if "presence_penalty" in optional_params:
span.set_attribute(
SpanAttributes.LLM_PRESENCE_PENALTY,
optional_params.get("presence_penalty"),
)
if "top_p" in optional_params:
span.set_attribute(
SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
)
if "tools" in optional_params or "functions" in optional_params:
span.set_attribute(
SpanAttributes.LLM_REQUEST_FUNCTIONS,
optional_params.get(
"tools", optional_params.get("functions")
),
)
if "user" in optional_params:
span.set_attribute(
SpanAttributes.LLM_USER, optional_params.get("user")
)
if "max_tokens" in optional_params:
span.set_attribute(
SpanAttributes.LLM_REQUEST_MAX_TOKENS,
kwargs.get("max_tokens"),
)
if "temperature" in optional_params:
span.set_attribute(
SpanAttributes.LLM_TEMPERATURE, kwargs.get("temperature")
)
for idx, prompt in enumerate(kwargs.get("messages")):
span.set_attribute(
f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
prompt.get("role"),
)
span.set_attribute(
f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
prompt.get("content"),
)
span.set_attribute( span.set_attribute(
SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model") SpanAttributes.LLM_FREQUENCY_PENALTY,
optional_params.get("frequency_penalty"),
)
if "presence_penalty" in optional_params:
span.set_attribute(
SpanAttributes.LLM_PRESENCE_PENALTY,
optional_params.get("presence_penalty"),
)
if "top_p" in optional_params:
span.set_attribute(
SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
)
if "tools" in optional_params or "functions" in optional_params:
span.set_attribute(
SpanAttributes.LLM_REQUEST_FUNCTIONS,
optional_params.get("tools", optional_params.get("functions")),
)
if "user" in optional_params:
span.set_attribute(
SpanAttributes.LLM_USER, optional_params.get("user")
)
if "max_tokens" in optional_params:
span.set_attribute(
SpanAttributes.LLM_REQUEST_MAX_TOKENS,
kwargs.get("max_tokens"),
)
if "temperature" in optional_params:
span.set_attribute(
SpanAttributes.LLM_REQUEST_TEMPERATURE,
kwargs.get("temperature"),
) )
usage = response_obj.get("usage")
if usage:
span.set_attribute(
SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
usage.get("total_tokens"),
)
span.set_attribute(
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
usage.get("completion_tokens"),
)
span.set_attribute(
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
usage.get("prompt_tokens"),
)
for idx, choice in enumerate(response_obj.get("choices")): for idx, prompt in enumerate(kwargs.get("messages")):
span.set_attribute( span.set_attribute(
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason", f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
choice.get("finish_reason"), prompt.get("role"),
) )
span.set_attribute( span.set_attribute(
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role", f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
choice.get("message").get("role"), prompt.get("content"),
) )
span.set_attribute(
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content", span.set_attribute(
choice.get("message").get("content"), SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
) )
usage = response_obj.get("usage")
if usage:
span.set_attribute(
SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
usage.get("total_tokens"),
)
span.set_attribute(
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
usage.get("completion_tokens"),
)
span.set_attribute(
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
usage.get("prompt_tokens"),
)
for idx, choice in enumerate(response_obj.get("choices")):
span.set_attribute(
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
choice.get("finish_reason"),
)
span.set_attribute(
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
choice.get("message").get("role"),
)
span.set_attribute(
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
choice.get("message").get("content"),
)
if (
level == "ERROR"
and status_message is not None
and isinstance(status_message, str)
):
span.record_exception(Exception(status_message))
span.set_status(Status(StatusCode.ERROR, status_message))
span.end(end_time)
except Exception as e: except Exception as e:
print_verbose(f"Traceloop Layer Error - {e}") print_verbose(f"Traceloop Layer Error - {e}")

View file

@ -1,49 +1,35 @@
# Commented out for now - since traceloop break ci/cd import sys
# import sys import os
# import os import time
# import io, asyncio import pytest
import litellm
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
from traceloop.sdk import Traceloop
# sys.path.insert(0, os.path.abspath('../..')) sys.path.insert(0, os.path.abspath("../.."))
# from litellm import completion
# import litellm
# litellm.num_retries = 3
# litellm.success_callback = [""]
# import time
# import pytest
# from traceloop.sdk import Traceloop
# Traceloop.init(app_name="test-litellm", disable_batch=True)
# def test_traceloop_logging(): @pytest.fixture()
# try: def exporter():
# litellm.set_verbose = True exporter = InMemorySpanExporter()
# response = litellm.completion( Traceloop.init(
# model="gpt-3.5-turbo", app_name="test_litellm",
# messages=[{"role": "user", "content":"This is a test"}], disable_batch=True,
# max_tokens=1000, exporter=exporter,
# temperature=0.7, )
# timeout=5, litellm.success_callback = ["traceloop"]
# ) litellm.set_verbose = True
# print(f"response: {response}")
# except Exception as e: return exporter
# pytest.fail(f"An exception occurred - {e}")
# # test_traceloop_logging()
# # def test_traceloop_logging_async(): @pytest.mark.parametrize("model", ["claude-instant-1.2", "gpt-3.5-turbo"])
# # try: def test_traceloop_logging(exporter, model):
# # litellm.set_verbose = True
# # async def test_acompletion(): litellm.completion(
# # return await litellm.acompletion( model=model,
# # model="gpt-3.5-turbo", messages=[{"role": "user", "content": "This is a test"}],
# # messages=[{"role": "user", "content":"This is a test"}], max_tokens=1000,
# # max_tokens=1000, temperature=0.7,
# # temperature=0.7, timeout=5,
# # timeout=5, )
# # )
# # response = asyncio.run(test_acompletion())
# # print(f"response: {response}")
# # except Exception as e:
# # pytest.fail(f"An exception occurred - {e}")
# # test_traceloop_logging_async()

View file

@ -2027,6 +2027,7 @@ class Logging:
response_obj=result, response_obj=result,
start_time=start_time, start_time=start_time,
end_time=end_time, end_time=end_time,
user_id=kwargs.get("user", None),
print_verbose=print_verbose, print_verbose=print_verbose,
) )
if callback == "s3": if callback == "s3":
@ -2598,6 +2599,17 @@ class Logging:
level="ERROR", level="ERROR",
kwargs=self.model_call_details, kwargs=self.model_call_details,
) )
if callback == "traceloop":
traceloopLogger.log_event(
start_time=start_time,
end_time=end_time,
response_obj=None,
user_id=kwargs.get("user", None),
print_verbose=print_verbose,
status_message=str(exception),
level="ERROR",
kwargs=self.model_call_details,
)
if callback == "prometheus": if callback == "prometheus":
global prometheusLogger global prometheusLogger
verbose_logger.debug("reaches prometheus for success logging!") verbose_logger.debug("reaches prometheus for success logging!")