diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 49d3fbc88c..f2d59b15fb 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -2800,7 +2800,9 @@ async def chat_completion( ## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call ## IMPORTANT Note: - initialize this before running pre-call checks. Ensures we log rejected requests to langfuse. - data["litellm_call_id"] = request.headers.get('x-litellm-call-id', str(uuid.uuid4())) + data["litellm_call_id"] = request.headers.get( + "x-litellm-call-id", str(uuid.uuid4()) + ) logging_obj, data = litellm.utils.function_setup( original_function="acompletion", rules_obj=litellm.utils.Rules(), @@ -3137,6 +3139,7 @@ async def completion( cache_key = hidden_params.get("cache_key", None) or "" api_base = hidden_params.get("api_base", None) or "" response_cost = hidden_params.get("response_cost", None) or "" + litellm_call_id = hidden_params.get("litellm_call_id", None) or "" ### ALERTING ### asyncio.create_task( @@ -3151,7 +3154,7 @@ async def completion( ): # use generate_responses to stream responses custom_headers = get_custom_headers( user_api_key_dict=user_api_key_dict, - call_id=logging_obj.litellm_call_id, + call_id=litellm_call_id, model_id=model_id, cache_key=cache_key, api_base=api_base, @@ -3172,7 +3175,7 @@ async def completion( fastapi_response.headers.update( get_custom_headers( user_api_key_dict=user_api_key_dict, - call_id=logging_obj.litellm_call_id, + call_id=litellm_call_id, model_id=model_id, cache_key=cache_key, api_base=api_base, @@ -3391,6 +3394,7 @@ async def embeddings( cache_key = hidden_params.get("cache_key", None) or "" api_base = hidden_params.get("api_base", None) or "" response_cost = hidden_params.get("response_cost", None) or "" + litellm_call_id = hidden_params.get("litellm_call_id", None) or "" fastapi_response.headers.update( get_custom_headers( @@ -3401,6 +3405,7 @@ async def embeddings( version=version, response_cost=response_cost, model_region=getattr(user_api_key_dict, "allowed_model_region", ""), + call_id=litellm_call_id, ) ) @@ -3545,6 +3550,7 @@ async def image_generation( cache_key = hidden_params.get("cache_key", None) or "" api_base = hidden_params.get("api_base", None) or "" response_cost = hidden_params.get("response_cost", None) or "" + litellm_call_id = hidden_params.get("litellm_call_id", None) or "" fastapi_response.headers.update( get_custom_headers( @@ -3555,6 +3561,7 @@ async def image_generation( version=version, response_cost=response_cost, model_region=getattr(user_api_key_dict, "allowed_model_region", ""), + call_id=litellm_call_id, ) ) @@ -3686,6 +3693,7 @@ async def audio_speech( cache_key = hidden_params.get("cache_key", None) or "" api_base = hidden_params.get("api_base", None) or "" response_cost = hidden_params.get("response_cost", None) or "" + litellm_call_id = hidden_params.get("litellm_call_id", None) or "" # Printing each chunk size async def generate(_response: HttpxBinaryResponseContent): @@ -3702,6 +3710,7 @@ async def audio_speech( response_cost=response_cost, model_region=getattr(user_api_key_dict, "allowed_model_region", ""), fastest_response_batch_completion=None, + call_id=litellm_call_id, ) selected_data_generator = select_data_generator( @@ -3856,6 +3865,7 @@ async def audio_transcriptions( cache_key = hidden_params.get("cache_key", None) or "" api_base = hidden_params.get("api_base", None) or "" response_cost = hidden_params.get("response_cost", None) or "" + litellm_call_id = hidden_params.get("litellm_call_id", None) or "" fastapi_response.headers.update( get_custom_headers( @@ -3866,6 +3876,7 @@ async def audio_transcriptions( version=version, response_cost=response_cost, model_region=getattr(user_api_key_dict, "allowed_model_region", ""), + call_id=litellm_call_id, ) ) diff --git a/litellm/utils.py b/litellm/utils.py index 65b4d63ebe..03ed52b5c0 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1332,6 +1332,9 @@ def client(original_function): # ADD HIDDEN PARAMS - additional call metadata if hasattr(result, "_hidden_params"): + result._hidden_params["litellm_call_id"] = getattr( + logging_obj, "litellm_call_id", None + ) result._hidden_params["model_id"] = kwargs.get("model_info", {}).get( "id", None )