(litellm sdk - perf improvement) - optimize pre_call_check (#7673)

* latency fix - litellm sdk * fix linting error * fix litellm logging
2025-04-28 04:04:31 +00:00 · 2025-01-10 14:16:39 -08:00 · 2025-01-10 14:16:39 -08:00 · 1c04ae6002
commit 1c04ae6002
parent 9e2b1101c0
1 changed files with 94 additions and 48 deletions
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -4,6 +4,7 @@
 import copy
 import datetime
 import json
 import logging
 import os
 import re
 import subprocess
@ -455,55 +456,11 @@ class Logging(LiteLLMLoggingBaseClass):
            )
            # User Logging -> if you pass in a custom logging function
-            headers = additional_args.get("headers", {})
+            self._print_llm_call_debugging_log(
-            if headers is None:
+                api_base=additional_args.get("api_base", ""),
-                headers = {}
+                headers=additional_args.get("headers", {}),
-            data = additional_args.get("complete_input_dict", {})
+                additional_args=additional_args,
            api_base = str(additional_args.get("api_base", ""))
            query_params = additional_args.get("query_params", {})
            if "key=" in api_base:
                # Find the position of "key=" in the string
                key_index = api_base.find("key=") + 4
                # Mask the last 5 characters after "key="
                masked_api_base = api_base[:key_index] + "*" * 5 + api_base[-4:]
            else:
                masked_api_base = api_base
            self.model_call_details["litellm_params"]["api_base"] = masked_api_base
            masked_headers = {
                k: (
                    (v[:-44] + "*" * 44)
                    if (isinstance(v, str) and len(v) > 44)
                    else "*****"
                )
                for k, v in headers.items()
            }
            formatted_headers = " ".join(
                [f"-H '{k}: {v}'" for k, v in masked_headers.items()]
            )
            verbose_logger.debug(f"PRE-API-CALL ADDITIONAL ARGS: {additional_args}")
            curl_command = "\n\nPOST Request Sent from LiteLLM:\n"
            curl_command += "curl -X POST \\\n"
            curl_command += f"{api_base} \\\n"
            curl_command += (
                f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else ""
            )
            curl_command += f"-d '{str(data)}'\n"
            if additional_args.get("request_str", None) is not None:
                # print the sagemaker / bedrock client request
                curl_command = "\nRequest Sent from LiteLLM:\n"
                curl_command += additional_args.get("request_str", None)
            elif api_base == "":
                curl_command = self.model_call_details
            if json_logs:
                verbose_logger.debug(
                    "POST Request Sent from LiteLLM",
                    extra={"api_base": {api_base}, **masked_headers},
                )
            else:
                print_verbose(f"\033[92m{curl_command}\033[0m\n", log_level="DEBUG")
            # log raw request to provider (like LangFuse) -- if opted in.
            if log_raw_request_response is True:
                _litellm_params = self.model_call_details.get("litellm_params", {})
@ -519,6 +476,12 @@ class Logging(LiteLLMLoggingBaseClass):
                            'litellm.turn_off_message_logging=True'"
                        )
                    else:
                        curl_command = self._get_request_curl_command(
                            api_base=additional_args.get("api_base", ""),
                            headers=additional_args.get("headers", {}),
                            additional_args=additional_args,
                            data=additional_args.get("complete_input_dict", {}),
                        )
                        _metadata["raw_request"] = str(curl_command)
                except Exception as e:
                    _metadata["raw_request"] = (
@ -612,6 +575,89 @@ class Logging(LiteLLMLoggingBaseClass):
            if capture_exception:  # log this error to sentry for debugging
                capture_exception(e)
    def _print_llm_call_debugging_log(
        self,
        api_base: str,
        headers: dict,
        additional_args: dict,
    ):
        """
        Internal debugging helper function
        Prints the RAW curl command sent from LiteLLM
        """
        if verbose_logger.isEnabledFor(logging.DEBUG) or litellm.set_verbose is True:
            if json_logs:
                masked_headers = self._get_masked_headers(headers)
                verbose_logger.debug(
                    "POST Request Sent from LiteLLM",
                    extra={"api_base": {api_base}, **masked_headers},
                )
            else:
                headers = additional_args.get("headers", {})
                if headers is None:
                    headers = {}
                data = additional_args.get("complete_input_dict", {})
                api_base = str(additional_args.get("api_base", ""))
                if "key=" in api_base:
                    # Find the position of "key=" in the string
                    key_index = api_base.find("key=") + 4
                    # Mask the last 5 characters after "key="
                    masked_api_base = api_base[:key_index] + "*" * 5 + api_base[-4:]
                else:
                    masked_api_base = api_base
                self.model_call_details["litellm_params"]["api_base"] = masked_api_base
                verbose_logger.debug(
                    "PRE-API-CALL ADDITIONAL ARGS: %s", additional_args
                )
                curl_command = self._get_request_curl_command(
                    api_base=api_base,
                    headers=headers,
                    additional_args=additional_args,
                    data=data,
                )
                verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
    def _get_request_curl_command(
        self, api_base: str, headers: dict, additional_args: dict, data: dict
    ) -> str:
        curl_command = "\n\nPOST Request Sent from LiteLLM:\n"
        curl_command += "curl -X POST \\\n"
        curl_command += f"{api_base} \\\n"
        masked_headers = self._get_masked_headers(headers)
        formatted_headers = " ".join(
            [f"-H '{k}: {v}'" for k, v in masked_headers.items()]
        )
        curl_command += (
            f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else ""
        )
        curl_command += f"-d '{str(data)}'\n"
        if additional_args.get("request_str", None) is not None:
            # print the sagemaker / bedrock client request
            curl_command = "\nRequest Sent from LiteLLM:\n"
            curl_command += additional_args.get("request_str", None)
        elif api_base == "":
            curl_command = str(self.model_call_details)
        return curl_command
    def _get_masked_headers(self, headers: dict):
        """
        Internal debugging helper function
        Masks the headers of the request sent from LiteLLM
        """
        return {
            k: (
                (v[:-44] + "*" * 44)
                if (isinstance(v, str) and len(v) > 44)
                else "*****"
            )
            for k, v in headers.items()
        }
    def post_call(
        self, original_response, input=None, api_key=None, additional_args={}
    ):