diff --git a/litellm/main.py b/litellm/main.py index 65696b3c0..87942f704 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -609,6 +609,7 @@ def completion( "client", "rpm", "tpm", + "max_parallel_requests", "input_cost_per_token", "output_cost_per_token", "input_cost_per_second", @@ -2560,6 +2561,7 @@ def embedding( client = kwargs.pop("client", None) rpm = kwargs.pop("rpm", None) tpm = kwargs.pop("tpm", None) + max_parallel_requests = kwargs.pop("max_parallel_requests", None) model_info = kwargs.get("model_info", None) metadata = kwargs.get("metadata", None) encoding_format = kwargs.get("encoding_format", None) @@ -2617,6 +2619,7 @@ def embedding( "client", "rpm", "tpm", + "max_parallel_requests", "input_cost_per_token", "output_cost_per_token", "input_cost_per_second", @@ -3476,6 +3479,7 @@ def image_generation( "client", "rpm", "tpm", + "max_parallel_requests", "input_cost_per_token", "output_cost_per_token", "hf_model_name",