From 8dcea009b8c0762591f234aa547ab94540bfdb59 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 5 Aug 2024 19:58:55 -0700 Subject: [PATCH 1/7] log event_metadata on otel service loggers --- litellm/proxy/utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 923021efc2..1c794fbc8c 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -128,6 +128,11 @@ def log_to_opentelemetry(func): duration=0.0, start_time=start_time, end_time=end_time, + event_metadata={ + "function_name": func.__name__, + "function_kwargs": kwargs, + "function_args": args, + }, ) elif ( # in litellm custom callbacks kwargs is passed as arg[0] @@ -170,6 +175,11 @@ def log_to_opentelemetry(func): duration=0.0, start_time=start_time, end_time=end_time, + event_metadata={ + "function_name": func.__name__, + "function_kwargs": kwargs, + "function_args": args, + }, ) raise e From 09105277cd058c5a1bc71e41764247cf5ffa160a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 5 Aug 2024 20:00:06 -0700 Subject: [PATCH 2/7] use otel callbacks --- litellm/proxy/proxy_config.yaml | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 0750a39376..50e8bcd623 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -26,18 +26,6 @@ model_list: model_info: mode: audio_speech -# For /fine_tuning/jobs endpoints -finetune_settings: - - custom_llm_provider: azure - api_base: https://exampleopenaiendpoint-production.up.railway.app - api_key: fake-key - api_version: "2023-03-15-preview" - - custom_llm_provider: openai - api_key: os.environ/OPENAI_API_KEY - - custom_llm_provider: "vertex_ai" - vertex_project: "adroit-crow-413218" - vertex_location: "us-central1" - vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json" # for /files endpoints files_settings: @@ -48,18 +36,11 @@ files_settings: - custom_llm_provider: openai api_key: os.environ/OPENAI_API_KEY -default_vertex_config: - vertex_project: "adroit-crow-413218" - vertex_location: "us-central1" - vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json" - general_settings: master_key: sk-1234 - # Security controls - max_request_size_mb: 100 - # google cloud run maximum repsonses size is 32MB - max_response_size_mb: 10 +litellm_settings: + callbacks: ["otel"] # 👈 KEY CHANGE \ No newline at end of file From 05a2a4462ea7b5c3bb010dd23899c5810c80b8a9 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 5 Aug 2024 20:01:58 -0700 Subject: [PATCH 3/7] otel log event_metadata --- litellm/integrations/opentelemetry.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index a82380432c..72031e58c9 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -153,6 +153,11 @@ class OpenTelemetry(CustomLogger): if event_metadata: for key, value in event_metadata.items(): + if isinstance(value, dict): + try: + value = str(value) + except Exception: + value = "litllm logging error - could_not_json_serialize" service_logging_span.set_attribute(key, value) service_logging_span.set_status(Status(StatusCode.OK)) service_logging_span.end(end_time=_end_time_ns) @@ -163,6 +168,7 @@ class OpenTelemetry(CustomLogger): parent_otel_span: Optional[Span] = None, start_time: Optional[Union[datetime, float]] = None, end_time: Optional[Union[float, datetime]] = None, + event_metadata: Optional[dict] = None, ): from datetime import datetime @@ -193,6 +199,15 @@ class OpenTelemetry(CustomLogger): service_logging_span.set_attribute( key="service", value=payload.service.value ) + if event_metadata: + for key, value in event_metadata.items(): + if isinstance(value, dict): + try: + value = str(value) + except Exception: + value = "litllm logging error - could_not_json_serialize" + service_logging_span.set_attribute(key, value) + service_logging_span.set_status(Status(StatusCode.ERROR)) service_logging_span.end(end_time=_end_time_ns) From 8d91112726262976ba325ef989389f6160769307 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 5 Aug 2024 20:03:34 -0700 Subject: [PATCH 4/7] log event_metadata on otel --- litellm/_service_logger.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/_service_logger.py b/litellm/_service_logger.py index 5d9ec74056..fbdf6f1259 100644 --- a/litellm/_service_logger.py +++ b/litellm/_service_logger.py @@ -97,6 +97,7 @@ class ServiceLogging(CustomLogger): parent_otel_span: Optional[Span] = None, start_time: Optional[Union[datetime, float]] = None, end_time: Optional[Union[float, datetime]] = None, + event_metadata: Optional[dict] = None, ): """ - For counting if the redis, postgres call is unsuccessful @@ -133,6 +134,7 @@ class ServiceLogging(CustomLogger): parent_otel_span=parent_otel_span, start_time=start_time, end_time=end_time, + event_metadata=event_metadata, ) async def async_post_call_failure_hook( From bec0350127b21eb6b8aec25760d03be6b2b46891 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 5 Aug 2024 20:18:40 -0700 Subject: [PATCH 5/7] otel fix async_service_failure_hook --- litellm/proxy/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 1c794fbc8c..fca71758e4 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -172,6 +172,7 @@ def log_to_opentelemetry(func): error=e, service=ServiceTypes.DB, call_type=func.__name__, + parent_otel_span=kwargs["parent_otel_span"], duration=0.0, start_time=start_time, end_time=end_time, From 6e4098fa26c261b64ec3c01dce19531d99757c70 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 5 Aug 2024 20:21:26 -0700 Subject: [PATCH 6/7] otel log service errors --- litellm/integrations/opentelemetry.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index 72031e58c9..379c418807 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -165,6 +165,7 @@ class OpenTelemetry(CustomLogger): async def async_service_failure_hook( self, payload: ServiceLoggerPayload, + error: Optional[str] = "", parent_otel_span: Optional[Span] = None, start_time: Optional[Union[datetime, float]] = None, end_time: Optional[Union[float, datetime]] = None, @@ -199,6 +200,8 @@ class OpenTelemetry(CustomLogger): service_logging_span.set_attribute( key="service", value=payload.service.value ) + if error: + service_logging_span.set_attribute(key="error", value=error) if event_metadata: for key, value in event_metadata.items(): if isinstance(value, dict): From f55a0d98f3be1f7d66785005a372913e8d4e052e Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 5 Aug 2024 20:23:02 -0700 Subject: [PATCH 7/7] otel log failures --- litellm/_service_logger.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/litellm/_service_logger.py b/litellm/_service_logger.py index fbdf6f1259..da0c99aac3 100644 --- a/litellm/_service_logger.py +++ b/litellm/_service_logger.py @@ -128,13 +128,16 @@ class ServiceLogging(CustomLogger): from litellm.proxy.proxy_server import open_telemetry_logger - if parent_otel_span is not None and open_telemetry_logger is not None: + if not isinstance(error, str): + error = str(error) + if open_telemetry_logger is not None: await open_telemetry_logger.async_service_failure_hook( payload=payload, parent_otel_span=parent_otel_span, start_time=start_time, end_time=end_time, event_metadata=event_metadata, + error=error, ) async def async_post_call_failure_hook(