fix(main.py): ignore max_parallel_requests as a litellm param

This commit is contained in:
Krrish Dholakia 2024-04-20 12:15:04 -07:00
parent 1507b23e30
commit 26579303e0

View file

@ -609,6 +609,7 @@ def completion(
"client", "client",
"rpm", "rpm",
"tpm", "tpm",
"max_parallel_requests",
"input_cost_per_token", "input_cost_per_token",
"output_cost_per_token", "output_cost_per_token",
"input_cost_per_second", "input_cost_per_second",
@ -2560,6 +2561,7 @@ def embedding(
client = kwargs.pop("client", None) client = kwargs.pop("client", None)
rpm = kwargs.pop("rpm", None) rpm = kwargs.pop("rpm", None)
tpm = kwargs.pop("tpm", None) tpm = kwargs.pop("tpm", None)
max_parallel_requests = kwargs.pop("max_parallel_requests", None)
model_info = kwargs.get("model_info", None) model_info = kwargs.get("model_info", None)
metadata = kwargs.get("metadata", None) metadata = kwargs.get("metadata", None)
encoding_format = kwargs.get("encoding_format", None) encoding_format = kwargs.get("encoding_format", None)
@ -2617,6 +2619,7 @@ def embedding(
"client", "client",
"rpm", "rpm",
"tpm", "tpm",
"max_parallel_requests",
"input_cost_per_token", "input_cost_per_token",
"output_cost_per_token", "output_cost_per_token",
"input_cost_per_second", "input_cost_per_second",
@ -3476,6 +3479,7 @@ def image_generation(
"client", "client",
"rpm", "rpm",
"tpm", "tpm",
"max_parallel_requests",
"input_cost_per_token", "input_cost_per_token",
"output_cost_per_token", "output_cost_per_token",
"hf_model_name", "hf_model_name",