From 3a2cb151aa637691dddc01657c52c042db2ea318 Mon Sep 17 00:00:00 2001
From: Marc Abramowitz <abramowi@adobe.com>
Date: Wed, 10 Jul 2024 16:05:37 -0700
Subject: [PATCH] Proxy: Add `x-litellm-call-id` response header

This gives the value of `logging_obj.litellm_call_id` and one particular use of
this is to correlate the HTTP response from a request with a trace in an LLM
logging tool like Langfuse, Langsmith, etc.

For example, if a user in my environment (w/ Langfuse) gets back this in the
response headers:

```
x-litellm-call-id: ffcb49e7-bd6e-4e56-9c08-a7243802b26e
```

then they know that they can see the trace for this request in Langfuse by
visiting https://langfuse.domain.com/trace/ffcb49e7-bd6e-4e56-9c08-a7243802b26e

They can also use this ID to submit scores for this request to the Langfuse
scoring API.
---
 litellm/proxy/proxy_server.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index a600ac018..df5acdeec 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -444,6 +444,7 @@ def _get_pydantic_json_dict(pydantic_obj: BaseModel) -> dict:
 def get_custom_headers(
     *,
     user_api_key_dict: UserAPIKeyAuth,
+    call_id: Optional[str] = None,
     model_id: Optional[str] = None,
     cache_key: Optional[str] = None,
     api_base: Optional[str] = None,
@@ -455,6 +456,7 @@ def get_custom_headers(
 ) -> dict:
     exclude_values = {"", None}
     headers = {
+        "x-litellm-call-id": call_id,
         "x-litellm-model-id": model_id,
         "x-litellm-cache-key": cache_key,
         "x-litellm-model-api-base": api_base,
@@ -2895,6 +2897,7 @@ async def chat_completion(
         ):  # use generate_responses to stream responses
             custom_headers = get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
+                call_id=logging_obj.litellm_call_id,
                 model_id=model_id,
                 cache_key=cache_key,
                 api_base=api_base,
@@ -2925,6 +2928,7 @@ async def chat_completion(
         fastapi_response.headers.update(
             get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
+                call_id=logging_obj.litellm_call_id,
                 model_id=model_id,
                 cache_key=cache_key,
                 api_base=api_base,
@@ -3138,6 +3142,7 @@ async def completion(
         ):  # use generate_responses to stream responses
             custom_headers = get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
+                call_id=logging_obj.litellm_call_id,
                 model_id=model_id,
                 cache_key=cache_key,
                 api_base=api_base,
@@ -3158,6 +3163,7 @@ async def completion(
         fastapi_response.headers.update(
             get_custom_headers(
                 user_api_key_dict=user_api_key_dict,
+                call_id=logging_obj.litellm_call_id,
                 model_id=model_id,
                 cache_key=cache_key,
                 api_base=api_base,