fix service logger for OTEL

This commit is contained in:
Ishaan Jaff 2024-06-06 22:12:45 -07:00
parent c867f88c57
commit 312521a0b3
3 changed files with 56 additions and 5 deletions

View file

@ -4,7 +4,9 @@ from .types.services import ServiceTypes, ServiceLoggerPayload
from .integrations.prometheus_services import PrometheusServicesLogger from .integrations.prometheus_services import PrometheusServicesLogger
from .integrations.custom_logger import CustomLogger from .integrations.custom_logger import CustomLogger
from datetime import timedelta from datetime import timedelta
from typing import Union from typing import Union, Optional
from opentelemetry.trace import Span
from datetime import datetime
class ServiceLogging(CustomLogger): class ServiceLogging(CustomLogger):
@ -40,7 +42,13 @@ class ServiceLogging(CustomLogger):
self.mock_testing_sync_failure_hook += 1 self.mock_testing_sync_failure_hook += 1
async def async_service_success_hook( async def async_service_success_hook(
self, service: ServiceTypes, duration: float, call_type: str self,
service: ServiceTypes,
call_type: str,
duration: float,
parent_otel_span: Optional[Span] = None,
start_time: Optional[datetime] = None,
end_time: Optional[datetime] = None,
): ):
""" """
- For counting if the redis, postgres call is successful - For counting if the redis, postgres call is successful
@ -61,6 +69,16 @@ class ServiceLogging(CustomLogger):
payload=payload payload=payload
) )
from litellm.proxy.proxy_server import open_telemetry_logger
if parent_otel_span is not None and open_telemetry_logger is not None:
await open_telemetry_logger.async_service_success_hook(
payload=payload,
parent_otel_span=parent_otel_span,
start_time=start_time,
end_time=end_time,
)
async def async_service_failure_hook( async def async_service_failure_hook(
self, self,
service: ServiceTypes, service: ServiceTypes,

View file

@ -4,6 +4,9 @@ from dataclasses import dataclass
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
from opentelemetry.trace import Span
from datetime import datetime
LITELLM_TRACER_NAME = "litellm" LITELLM_TRACER_NAME = "litellm"
LITELLM_RESOURCE = {"service.name": "litellm"} LITELLM_RESOURCE = {"service.name": "litellm"}
@ -74,6 +77,30 @@ class OpenTelemetry(CustomLogger):
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
self._handle_failure(kwargs, response_obj, start_time, end_time) self._handle_failure(kwargs, response_obj, start_time, end_time)
async def async_service_success_hook(
self,
payload: ServiceLoggerPayload,
parent_otel_span: Optional[Span] = None,
start_time: Optional[datetime] = None,
end_time: Optional[datetime] = None,
):
from opentelemetry import trace
from datetime import datetime
if parent_otel_span is not None:
_span_name = payload.service
service_logging_span = self.tracer.start_span(
name=_span_name,
context=trace.set_span_in_context(parent_otel_span),
start_time=self._to_ns(start_time),
)
service_logging_span.set_attribute(key="call_type", value=payload.call_type)
service_logging_span.set_attribute(
key="service", value=payload.service.value
)
service_logging_span.end(end_time=self._to_ns(end_time))
parent_otel_span.end(end_time=self._to_ns(datetime.now()))
def _handle_sucess(self, kwargs, response_obj, start_time, end_time): def _handle_sucess(self, kwargs, response_obj, start_time, end_time):
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode

View file

@ -104,6 +104,7 @@ from litellm.proxy.utils import (
update_spend, update_spend,
encrypt_value, encrypt_value,
decrypt_value, decrypt_value,
_to_ns,
) )
from litellm import ( from litellm import (
CreateBatchRequest, CreateBatchRequest,
@ -399,7 +400,7 @@ disable_spend_logs = False
jwt_handler = JWTHandler() jwt_handler = JWTHandler()
prompt_injection_detection_obj: Optional[_OPTIONAL_PromptInjectionDetection] = None prompt_injection_detection_obj: Optional[_OPTIONAL_PromptInjectionDetection] = None
store_model_in_db: bool = False store_model_in_db: bool = False
open_telemetry_logger = None open_telemetry_logger: Optional[litellm.integrations.opentelemetry.OpenTelemetry] = None
### INITIALIZE GLOBAL LOGGING OBJECT ### ### INITIALIZE GLOBAL LOGGING OBJECT ###
proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache) proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache)
### REDIS QUEUE ### ### REDIS QUEUE ###
@ -495,7 +496,7 @@ async def check_request_disconnection(request: Request, llm_api_call_task):
async def user_api_key_auth( async def user_api_key_auth(
request: Request, api_key: str = fastapi.Security(api_key_header) request: Request, api_key: str = fastapi.Security(api_key_header)
) -> UserAPIKeyAuth: ) -> UserAPIKeyAuth:
global master_key, prisma_client, llm_model_list, user_custom_auth, custom_db_client, general_settings global master_key, prisma_client, llm_model_list, user_custom_auth, custom_db_client, general_settings, proxy_logging_obj
try: try:
if isinstance(api_key, str): if isinstance(api_key, str):
passed_in_key = api_key passed_in_key = api_key
@ -504,7 +505,7 @@ async def user_api_key_auth(
if open_telemetry_logger is not None: if open_telemetry_logger is not None:
parent_otel_span = open_telemetry_logger.tracer.start_span( parent_otel_span = open_telemetry_logger.tracer.start_span(
name="Received Proxy Server Request", name="Received Proxy Server Request",
start_time=time.time(), start_time=_to_ns(datetime.now()),
) )
### USER-DEFINED AUTH FUNCTION ### ### USER-DEFINED AUTH FUNCTION ###
if user_custom_auth is not None: if user_custom_auth is not None:
@ -588,6 +589,7 @@ async def user_api_key_auth(
prisma_client=prisma_client, prisma_client=prisma_client,
user_api_key_cache=user_api_key_cache, user_api_key_cache=user_api_key_cache,
parent_otel_span=parent_otel_span, parent_otel_span=parent_otel_span,
proxy_logging_obj=proxy_logging_obj,
) )
# [OPTIONAL] track spend for an org id - `LiteLLM_OrganizationTable` # [OPTIONAL] track spend for an org id - `LiteLLM_OrganizationTable`
@ -600,6 +602,7 @@ async def user_api_key_auth(
prisma_client=prisma_client, prisma_client=prisma_client,
user_api_key_cache=user_api_key_cache, user_api_key_cache=user_api_key_cache,
parent_otel_span=parent_otel_span, parent_otel_span=parent_otel_span,
proxy_logging_obj=proxy_logging_obj,
) )
# [OPTIONAL] track spend against an internal employee - `LiteLLM_UserTable` # [OPTIONAL] track spend against an internal employee - `LiteLLM_UserTable`
user_object = None user_object = None
@ -614,6 +617,7 @@ async def user_api_key_auth(
user_api_key_cache=user_api_key_cache, user_api_key_cache=user_api_key_cache,
user_id_upsert=jwt_handler.is_upsert_user_id(), user_id_upsert=jwt_handler.is_upsert_user_id(),
parent_otel_span=parent_otel_span, parent_otel_span=parent_otel_span,
proxy_logging_obj=proxy_logging_obj,
) )
# [OPTIONAL] track spend against an external user - `LiteLLM_EndUserTable` # [OPTIONAL] track spend against an external user - `LiteLLM_EndUserTable`
@ -628,6 +632,7 @@ async def user_api_key_auth(
prisma_client=prisma_client, prisma_client=prisma_client,
user_api_key_cache=user_api_key_cache, user_api_key_cache=user_api_key_cache,
parent_otel_span=parent_otel_span, parent_otel_span=parent_otel_span,
proxy_logging_obj=proxy_logging_obj,
) )
global_proxy_spend = None global_proxy_spend = None
@ -727,6 +732,7 @@ async def user_api_key_auth(
prisma_client=prisma_client, prisma_client=prisma_client,
user_api_key_cache=user_api_key_cache, user_api_key_cache=user_api_key_cache,
parent_otel_span=parent_otel_span, parent_otel_span=parent_otel_span,
proxy_logging_obj=proxy_logging_obj,
) )
if _end_user_object is not None: if _end_user_object is not None:
end_user_params["allowed_model_region"] = ( end_user_params["allowed_model_region"] = (