fix(main.py): ignore max_parallel_requests as a litellm param

2024-04-20 12:15:04 -07:00 · 2024-04-20 12:15:04 -07:00 · 26579303e0
commit 26579303e0
parent 1507b23e30
1 changed files with 4 additions and 0 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -609,6 +609,7 @@ def completion(
        "client",
        "rpm",
        "tpm",
+        "max_parallel_requests",
        "input_cost_per_token",
        "output_cost_per_token",
        "input_cost_per_second",
@ -2560,6 +2561,7 @@ def embedding(
    client = kwargs.pop("client", None)
    rpm = kwargs.pop("rpm", None)
    tpm = kwargs.pop("tpm", None)
+    max_parallel_requests = kwargs.pop("max_parallel_requests", None)
    model_info = kwargs.get("model_info", None)
    metadata = kwargs.get("metadata", None)
    encoding_format = kwargs.get("encoding_format", None)
@ -2617,6 +2619,7 @@ def embedding(
        "client",
        "rpm",
        "tpm",
+        "max_parallel_requests",
        "input_cost_per_token",
        "output_cost_per_token",
        "input_cost_per_second",
@ -3476,6 +3479,7 @@ def image_generation(
            "client",
            "rpm",
            "tpm",
+            "max_parallel_requests",
            "input_cost_per_token",
            "output_cost_per_token",
            "hf_model_name",