From 3a2cb151aa637691dddc01657c52c042db2ea318 Mon Sep 17 00:00:00 2001 From: Marc Abramowitz Date: Wed, 10 Jul 2024 16:05:37 -0700 Subject: [PATCH 1/2] Proxy: Add `x-litellm-call-id` response header This gives the value of `logging_obj.litellm_call_id` and one particular use of this is to correlate the HTTP response from a request with a trace in an LLM logging tool like Langfuse, Langsmith, etc. For example, if a user in my environment (w/ Langfuse) gets back this in the response headers: ``` x-litellm-call-id: ffcb49e7-bd6e-4e56-9c08-a7243802b26e ``` then they know that they can see the trace for this request in Langfuse by visiting https://langfuse.domain.com/trace/ffcb49e7-bd6e-4e56-9c08-a7243802b26e They can also use this ID to submit scores for this request to the Langfuse scoring API. --- litellm/proxy/proxy_server.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index a600ac018..df5acdeec 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -444,6 +444,7 @@ def _get_pydantic_json_dict(pydantic_obj: BaseModel) -> dict: def get_custom_headers( *, user_api_key_dict: UserAPIKeyAuth, + call_id: Optional[str] = None, model_id: Optional[str] = None, cache_key: Optional[str] = None, api_base: Optional[str] = None, @@ -455,6 +456,7 @@ def get_custom_headers( ) -> dict: exclude_values = {"", None} headers = { + "x-litellm-call-id": call_id, "x-litellm-model-id": model_id, "x-litellm-cache-key": cache_key, "x-litellm-model-api-base": api_base, @@ -2895,6 +2897,7 @@ async def chat_completion( ): # use generate_responses to stream responses custom_headers = get_custom_headers( user_api_key_dict=user_api_key_dict, + call_id=logging_obj.litellm_call_id, model_id=model_id, cache_key=cache_key, api_base=api_base, @@ -2925,6 +2928,7 @@ async def chat_completion( fastapi_response.headers.update( get_custom_headers( user_api_key_dict=user_api_key_dict, + call_id=logging_obj.litellm_call_id, model_id=model_id, cache_key=cache_key, api_base=api_base, @@ -3138,6 +3142,7 @@ async def completion( ): # use generate_responses to stream responses custom_headers = get_custom_headers( user_api_key_dict=user_api_key_dict, + call_id=logging_obj.litellm_call_id, model_id=model_id, cache_key=cache_key, api_base=api_base, @@ -3158,6 +3163,7 @@ async def completion( fastapi_response.headers.update( get_custom_headers( user_api_key_dict=user_api_key_dict, + call_id=logging_obj.litellm_call_id, model_id=model_id, cache_key=cache_key, api_base=api_base, From 982603714e20285ca1f33dabc4f1b4f434936de6 Mon Sep 17 00:00:00 2001 From: Marc Abramowitz Date: Wed, 10 Jul 2024 16:30:52 -0700 Subject: [PATCH 2/2] Add docs --- docs/my-website/docs/proxy/logging.md | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md index c2f583366..4ae3ab977 100644 --- a/docs/my-website/docs/proxy/logging.md +++ b/docs/my-website/docs/proxy/logging.md @@ -22,6 +22,39 @@ Log Proxy Input, Output, Exceptions using Langfuse, OpenTelemetry, Custom Callba - [Logging to Athina](#logging-proxy-inputoutput-athina) - [(BETA) Moderation with Azure Content-Safety](#moderation-with-azure-content-safety) +## Getting the LiteLLM Call ID + +LiteLLM generates a unique `call_id` for each request. This `call_id` can be +used to track the request across the system. This can be very useful for finding +the info for a particular request in a logging system like one of the systems +mentioned in this page. + +```shell +curl -i -sSL --location 'http://0.0.0.0:4000/chat/completions' \ + --header 'Authorization: Bearer sk-1234' \ + --header 'Content-Type: application/json' \ + --data '{ + "model": "gpt-3.5-turbo", + "messages": [{"role": "user", "content": "what llm are you"}] + }' | grep 'x-litellm' +``` + +The output of this is: + +```output +x-litellm-call-id: b980db26-9512-45cc-b1da-c511a363b83f +x-litellm-model-id: cb41bc03f4c33d310019bae8c5afdb1af0a8f97b36a234405a9807614988457c +x-litellm-model-api-base: https://x-example-1234.openai.azure.com +x-litellm-version: 1.40.21 +x-litellm-response-cost: 2.85e-05 +x-litellm-key-tpm-limit: None +x-litellm-key-rpm-limit: None +``` + +A number of these headers could be useful for troubleshooting, but the +`x-litellm-call-id` is the one that is most useful for tracking a request across +components in your system, including in logging tools. + ## Logging Proxy Input/Output - Langfuse We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse. Make sure to set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` in your environment