From 3e62f90b439626e331cb1d5718668e37424c8839 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Fri, 1 Dec 2023 21:02:39 -0800 Subject: [PATCH] (feat) proxy: add OTEL logging input/output --- litellm/proxy/proxy_server.py | 45 +++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 255d5389a..f0091866f 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -91,7 +91,7 @@ def generate_feedback_box(): import litellm from litellm.caching import DualCache litellm.suppress_debug_info = True -from fastapi import FastAPI, Request, HTTPException, status, Depends +from fastapi import FastAPI, Request, HTTPException, status, Depends, BackgroundTasks from fastapi.routing import APIRouter from fastapi.encoders import jsonable_encoder from fastapi.responses import StreamingResponse, FileResponse, ORJSONResponse @@ -113,7 +113,44 @@ app.add_middleware( allow_methods=["*"], allow_headers=["*"], ) +def log_input_output(request, response): + from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter + from opentelemetry.sdk.resources import Resource + # Initialize OpenTelemetry components + otlp_exporter = OTLPSpanExporter(endpoint="localhost:4317", insecure=True) + resource = Resource.create({ + "service.name": "my_app", + }) + trace.set_tracer_provider(TracerProvider(resource=resource)) + tracer = trace.get_tracer(__name__) + span_processor = SimpleSpanProcessor(otlp_exporter) + trace.get_tracer_provider().add_span_processor(span_processor) + with tracer.start_as_current_span("litellm-completion") as current_span: + input_event_attributes = {f"{key}": str(value) for key, value in dict(request).items() if value is not None} + # Log the input event with attributes + current_span.add_event( + name="LiteLLM: Request Input", + attributes=input_event_attributes + ) + event_headers = {f"{key}": str(value) for key, value in dict(request.headers).items() if value is not None} + current_span.add_event( + name="LiteLLM: Request Headers", + attributes=event_headers + ) + + input_event_attributes.update(event_headers) + + input_event_attributes.update({f"{key}": str(value) for key, value in dict(response).items()}) + current_span.add_event( + name="LiteLLM: Request Outpu", + attributes=input_event_attributes + ) + return True from typing import Dict from pydantic import BaseModel @@ -796,12 +833,11 @@ async def completion(request: Request, model: Optional[str] = None, user_api_key @router.post("/v1/chat/completions", dependencies=[Depends(user_api_key_auth)], tags=["chat/completions"]) @router.post("/chat/completions", dependencies=[Depends(user_api_key_auth)], tags=["chat/completions"]) @router.post("/openai/deployments/{model:path}/chat/completions", dependencies=[Depends(user_api_key_auth)], tags=["chat/completions"]) # azure compatible endpoint -async def chat_completion(request: ProxyChatCompletionRequest, model: Optional[str] = None, user_api_key_dict: dict = Depends(user_api_key_auth)): +async def chat_completion(request: Request, model: Optional[str] = None, user_api_key_dict: dict = Depends(user_api_key_auth), background_tasks: BackgroundTasks = None): global general_settings, user_debug try: data = {} - request_items = request.dict() # type: ignore - data = {key: value for key, value in request_items.items() if value is not None} # pydantic sets all values to None, filter out None values here + data = await request.json() # type: ignore print_verbose(f"receiving data: {data}") data["model"] = ( @@ -833,6 +869,7 @@ async def chat_completion(request: ProxyChatCompletionRequest, model: Optional[s response = await litellm.acompletion(**data) if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses return StreamingResponse(async_data_generator(response), media_type='text/event-stream') + background_tasks.add_task(log_input_output, request, response) # background task for logging to OTEL return response except Exception as e: print(f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`")