mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Revert "fix: Log errors in Traceloop Integration (reverts previous revert)"
This commit is contained in:
parent
1d18ca6a7d
commit
77cc9cded9
3 changed files with 139 additions and 176 deletions
|
@ -1,153 +1,114 @@
|
|||
import traceback
|
||||
from litellm._logging import verbose_logger
|
||||
import litellm
|
||||
|
||||
|
||||
class TraceloopLogger:
|
||||
def __init__(self):
|
||||
try:
|
||||
from traceloop.sdk.tracing.tracing import TracerWrapper
|
||||
from traceloop.sdk import Traceloop
|
||||
from traceloop.sdk.instruments import Instruments
|
||||
except ModuleNotFoundError as e:
|
||||
verbose_logger.error(
|
||||
f"Traceloop not installed, try running 'pip install traceloop-sdk' to fix this error: {e}\n{traceback.format_exc()}"
|
||||
)
|
||||
from traceloop.sdk.tracing.tracing import TracerWrapper
|
||||
from traceloop.sdk import Traceloop
|
||||
|
||||
Traceloop.init(
|
||||
app_name="Litellm-Server",
|
||||
disable_batch=True,
|
||||
instruments=[
|
||||
Instruments.CHROMA,
|
||||
Instruments.PINECONE,
|
||||
Instruments.WEAVIATE,
|
||||
Instruments.LLAMA_INDEX,
|
||||
Instruments.LANGCHAIN,
|
||||
],
|
||||
)
|
||||
Traceloop.init(app_name="Litellm-Server", disable_batch=True)
|
||||
self.tracer_wrapper = TracerWrapper()
|
||||
|
||||
def log_event(
|
||||
self,
|
||||
kwargs,
|
||||
response_obj,
|
||||
start_time,
|
||||
end_time,
|
||||
user_id,
|
||||
print_verbose,
|
||||
level="DEFAULT",
|
||||
status_message=None,
|
||||
):
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.trace import SpanKind, Status, StatusCode
|
||||
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
|
||||
from opentelemetry.trace import SpanKind
|
||||
from opentelemetry.semconv.ai import SpanAttributes
|
||||
|
||||
try:
|
||||
print_verbose(
|
||||
f"Traceloop Logging - Enters logging function for model {kwargs}"
|
||||
)
|
||||
|
||||
tracer = self.tracer_wrapper.get_tracer()
|
||||
|
||||
model = kwargs.get("model")
|
||||
|
||||
# LiteLLM uses the standard OpenAI library, so it's already handled by Traceloop SDK
|
||||
if kwargs.get("litellm_params").get("custom_llm_provider") == "openai":
|
||||
return
|
||||
|
||||
optional_params = kwargs.get("optional_params", {})
|
||||
span = tracer.start_span(
|
||||
"litellm.completion", kind=SpanKind.CLIENT, start_time=start_time
|
||||
)
|
||||
with tracer.start_as_current_span(
|
||||
"litellm.completion",
|
||||
kind=SpanKind.CLIENT,
|
||||
) as span:
|
||||
if span.is_recording():
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model")
|
||||
)
|
||||
if "stop" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_CHAT_STOP_SEQUENCES,
|
||||
optional_params.get("stop"),
|
||||
)
|
||||
if "frequency_penalty" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_FREQUENCY_PENALTY,
|
||||
optional_params.get("frequency_penalty"),
|
||||
)
|
||||
if "presence_penalty" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_PRESENCE_PENALTY,
|
||||
optional_params.get("presence_penalty"),
|
||||
)
|
||||
if "top_p" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
|
||||
)
|
||||
if "tools" in optional_params or "functions" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_REQUEST_FUNCTIONS,
|
||||
optional_params.get(
|
||||
"tools", optional_params.get("functions")
|
||||
),
|
||||
)
|
||||
if "user" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_USER, optional_params.get("user")
|
||||
)
|
||||
if "max_tokens" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_REQUEST_MAX_TOKENS,
|
||||
kwargs.get("max_tokens"),
|
||||
)
|
||||
if "temperature" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_TEMPERATURE, kwargs.get("temperature")
|
||||
)
|
||||
|
||||
if span.is_recording():
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model")
|
||||
)
|
||||
if "stop" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_CHAT_STOP_SEQUENCES,
|
||||
optional_params.get("stop"),
|
||||
)
|
||||
if "frequency_penalty" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_FREQUENCY_PENALTY,
|
||||
optional_params.get("frequency_penalty"),
|
||||
)
|
||||
if "presence_penalty" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_PRESENCE_PENALTY,
|
||||
optional_params.get("presence_penalty"),
|
||||
)
|
||||
if "top_p" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
|
||||
)
|
||||
if "tools" in optional_params or "functions" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_REQUEST_FUNCTIONS,
|
||||
optional_params.get("tools", optional_params.get("functions")),
|
||||
)
|
||||
if "user" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_USER, optional_params.get("user")
|
||||
)
|
||||
if "max_tokens" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_REQUEST_MAX_TOKENS,
|
||||
kwargs.get("max_tokens"),
|
||||
)
|
||||
if "temperature" in optional_params:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_REQUEST_TEMPERATURE,
|
||||
kwargs.get("temperature"),
|
||||
)
|
||||
for idx, prompt in enumerate(kwargs.get("messages")):
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
|
||||
prompt.get("role"),
|
||||
)
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
|
||||
prompt.get("content"),
|
||||
)
|
||||
|
||||
for idx, prompt in enumerate(kwargs.get("messages")):
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
|
||||
prompt.get("role"),
|
||||
)
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
|
||||
prompt.get("content"),
|
||||
SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
|
||||
)
|
||||
usage = response_obj.get("usage")
|
||||
if usage:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
|
||||
usage.get("total_tokens"),
|
||||
)
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
|
||||
usage.get("completion_tokens"),
|
||||
)
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
|
||||
usage.get("prompt_tokens"),
|
||||
)
|
||||
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
|
||||
)
|
||||
usage = response_obj.get("usage")
|
||||
if usage:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
|
||||
usage.get("total_tokens"),
|
||||
)
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
|
||||
usage.get("completion_tokens"),
|
||||
)
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
|
||||
usage.get("prompt_tokens"),
|
||||
)
|
||||
|
||||
for idx, choice in enumerate(response_obj.get("choices")):
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
|
||||
choice.get("finish_reason"),
|
||||
)
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
|
||||
choice.get("message").get("role"),
|
||||
)
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
|
||||
choice.get("message").get("content"),
|
||||
)
|
||||
|
||||
if (
|
||||
level == "ERROR"
|
||||
and status_message is not None
|
||||
and isinstance(status_message, str)
|
||||
):
|
||||
span.record_exception(Exception(status_message))
|
||||
span.set_status(Status(StatusCode.ERROR, status_message))
|
||||
|
||||
span.end(end_time)
|
||||
for idx, choice in enumerate(response_obj.get("choices")):
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
|
||||
choice.get("finish_reason"),
|
||||
)
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
|
||||
choice.get("message").get("role"),
|
||||
)
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
|
||||
choice.get("message").get("content"),
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print_verbose(f"Traceloop Layer Error - {e}")
|
||||
|
|
|
@ -1,35 +1,49 @@
|
|||
import sys
|
||||
import os
|
||||
import time
|
||||
import pytest
|
||||
import litellm
|
||||
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||
from traceloop.sdk import Traceloop
|
||||
# Commented out for now - since traceloop break ci/cd
|
||||
# import sys
|
||||
# import os
|
||||
# import io, asyncio
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
# sys.path.insert(0, os.path.abspath('../..'))
|
||||
|
||||
# from litellm import completion
|
||||
# import litellm
|
||||
# litellm.num_retries = 3
|
||||
# litellm.success_callback = [""]
|
||||
# import time
|
||||
# import pytest
|
||||
# from traceloop.sdk import Traceloop
|
||||
# Traceloop.init(app_name="test-litellm", disable_batch=True)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def exporter():
|
||||
exporter = InMemorySpanExporter()
|
||||
Traceloop.init(
|
||||
app_name="test_litellm",
|
||||
disable_batch=True,
|
||||
exporter=exporter,
|
||||
)
|
||||
litellm.success_callback = ["traceloop"]
|
||||
litellm.set_verbose = True
|
||||
|
||||
return exporter
|
||||
# def test_traceloop_logging():
|
||||
# try:
|
||||
# litellm.set_verbose = True
|
||||
# response = litellm.completion(
|
||||
# model="gpt-3.5-turbo",
|
||||
# messages=[{"role": "user", "content":"This is a test"}],
|
||||
# max_tokens=1000,
|
||||
# temperature=0.7,
|
||||
# timeout=5,
|
||||
# )
|
||||
# print(f"response: {response}")
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"An exception occurred - {e}")
|
||||
# # test_traceloop_logging()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", ["claude-instant-1.2", "gpt-3.5-turbo"])
|
||||
def test_traceloop_logging(exporter, model):
|
||||
|
||||
litellm.completion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "This is a test"}],
|
||||
max_tokens=1000,
|
||||
temperature=0.7,
|
||||
timeout=5,
|
||||
)
|
||||
# # def test_traceloop_logging_async():
|
||||
# # try:
|
||||
# # litellm.set_verbose = True
|
||||
# # async def test_acompletion():
|
||||
# # return await litellm.acompletion(
|
||||
# # model="gpt-3.5-turbo",
|
||||
# # messages=[{"role": "user", "content":"This is a test"}],
|
||||
# # max_tokens=1000,
|
||||
# # temperature=0.7,
|
||||
# # timeout=5,
|
||||
# # )
|
||||
# # response = asyncio.run(test_acompletion())
|
||||
# # print(f"response: {response}")
|
||||
# # except Exception as e:
|
||||
# # pytest.fail(f"An exception occurred - {e}")
|
||||
# # test_traceloop_logging_async()
|
||||
|
|
|
@ -2027,7 +2027,6 @@ class Logging:
|
|||
response_obj=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
user_id=kwargs.get("user", None),
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
if callback == "s3":
|
||||
|
@ -2599,17 +2598,6 @@ class Logging:
|
|||
level="ERROR",
|
||||
kwargs=self.model_call_details,
|
||||
)
|
||||
if callback == "traceloop":
|
||||
traceloopLogger.log_event(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
response_obj=None,
|
||||
user_id=kwargs.get("user", None),
|
||||
print_verbose=print_verbose,
|
||||
status_message=str(exception),
|
||||
level="ERROR",
|
||||
kwargs=self.model_call_details,
|
||||
)
|
||||
if callback == "prometheus":
|
||||
global prometheusLogger
|
||||
verbose_logger.debug("reaches prometheus for success logging!")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue