mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
fixing optional param mapping
This commit is contained in:
parent
7cec308a2c
commit
8b60d797e1
6 changed files with 32 additions and 21 deletions
Binary file not shown.
Binary file not shown.
|
@ -202,6 +202,7 @@ def completion(
|
||||||
- If 'mock_response' is provided, a mock completion response is returned for testing or debugging.
|
- If 'mock_response' is provided, a mock completion response is returned for testing or debugging.
|
||||||
"""
|
"""
|
||||||
######### unpacking kwargs #####################
|
######### unpacking kwargs #####################
|
||||||
|
args = locals()
|
||||||
return_async = kwargs.get('return_async', False)
|
return_async = kwargs.get('return_async', False)
|
||||||
mock_response = kwargs.get('mock_response', None)
|
mock_response = kwargs.get('mock_response', None)
|
||||||
api_key = kwargs.get('api_key', None)
|
api_key = kwargs.get('api_key', None)
|
||||||
|
@ -216,9 +217,8 @@ def completion(
|
||||||
metadata = kwargs.get('metadata', None)
|
metadata = kwargs.get('metadata', None)
|
||||||
fallbacks = kwargs.get('fallbacks', [])
|
fallbacks = kwargs.get('fallbacks', [])
|
||||||
######## end of unpacking kwargs ###########
|
######## end of unpacking kwargs ###########
|
||||||
args = locals()
|
|
||||||
openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "metadata"]
|
openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "metadata"]
|
||||||
litellm_params = ["caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "metadata", "fallbacks"]
|
litellm_params = ["acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "metadata", "fallbacks"]
|
||||||
default_params = openai_params + litellm_params
|
default_params = openai_params + litellm_params
|
||||||
non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider
|
non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider
|
||||||
if mock_response:
|
if mock_response:
|
||||||
|
@ -797,7 +797,7 @@ def completion(
|
||||||
logging_obj=logging
|
logging_obj=logging
|
||||||
)
|
)
|
||||||
# fake palm streaming
|
# fake palm streaming
|
||||||
if stream == True:
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
# fake streaming for palm
|
# fake streaming for palm
|
||||||
resp_string = model_response["choices"][0]["message"]["content"]
|
resp_string = model_response["choices"][0]["message"]["content"]
|
||||||
response = CustomStreamWrapper(
|
response = CustomStreamWrapper(
|
||||||
|
@ -836,7 +836,6 @@ def completion(
|
||||||
if k not in optional_params:
|
if k not in optional_params:
|
||||||
optional_params[k] = v
|
optional_params[k] = v
|
||||||
|
|
||||||
print(f"optional_params: {optional_params}")
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params})
|
logging.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params})
|
||||||
|
|
||||||
|
@ -979,7 +978,7 @@ def completion(
|
||||||
logging_obj=logging
|
logging_obj=logging
|
||||||
)
|
)
|
||||||
|
|
||||||
if stream==True: ## [BETA]
|
if "stream" in optional_params and optional_params["stream"]==True: ## [BETA]
|
||||||
# sagemaker does not support streaming as of now so we're faking streaming:
|
# sagemaker does not support streaming as of now so we're faking streaming:
|
||||||
# https://discuss.huggingface.co/t/streaming-output-text-when-deploying-on-sagemaker/39611
|
# https://discuss.huggingface.co/t/streaming-output-text-when-deploying-on-sagemaker/39611
|
||||||
# "SageMaker is currently not supporting streaming responses."
|
# "SageMaker is currently not supporting streaming responses."
|
||||||
|
@ -1009,7 +1008,7 @@ def completion(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if stream == True:
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
# don't try to access stream object,
|
# don't try to access stream object,
|
||||||
response = CustomStreamWrapper(
|
response = CustomStreamWrapper(
|
||||||
iter(model_response), model, custom_llm_provider="bedrock", logging_obj=logging
|
iter(model_response), model, custom_llm_provider="bedrock", logging_obj=logging
|
||||||
|
|
|
@ -19,10 +19,8 @@ async def test_get_response():
|
||||||
response = await acompletion(model="gpt-3.5-turbo", messages=messages)
|
response = await acompletion(model="gpt-3.5-turbo", messages=messages)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
pass
|
||||||
return response
|
|
||||||
|
|
||||||
|
response = asyncio.run(test_get_response())
|
||||||
# response = asyncio.run(test_get_response())
|
|
||||||
# print(response)
|
# print(response)
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
|
@ -47,7 +47,6 @@ def test_completion_claude():
|
||||||
print(response.response_ms)
|
print(response.response_ms)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
test_completion_claude()
|
|
||||||
|
|
||||||
def test_completion_claude_max_tokens():
|
def test_completion_claude_max_tokens():
|
||||||
try:
|
try:
|
||||||
|
@ -531,7 +530,7 @@ def test_completion_openai_with_more_optional_params():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
test_completion_openai_with_more_optional_params()
|
# test_completion_openai_with_more_optional_params()
|
||||||
# def test_completion_openai_azure_with_functions():
|
# def test_completion_openai_azure_with_functions():
|
||||||
# function1 = [
|
# function1 = [
|
||||||
# {
|
# {
|
||||||
|
@ -916,7 +915,8 @@ def test_completion_bedrock_ai21():
|
||||||
|
|
||||||
|
|
||||||
def test_completion_with_fallbacks():
|
def test_completion_with_fallbacks():
|
||||||
fallbacks = ["gpt-3.5-turb", "gpt-3.5-turbo", "command-nightly"]
|
print(f"RUNNING TEST COMPLETION WITH FALLBACKS - test_completion_with_fallbacks")
|
||||||
|
fallbacks = ["gpt-3.5-turbo", "gpt-3.5-turbo", "command-nightly"]
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
model="bad-model", messages=messages, force_timeout=120, fallbacks=fallbacks
|
model="bad-model", messages=messages, force_timeout=120, fallbacks=fallbacks
|
||||||
|
@ -926,6 +926,7 @@ def test_completion_with_fallbacks():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
test_completion_with_fallbacks()
|
||||||
# def test_completion_with_fallbacks_multiple_keys():
|
# def test_completion_with_fallbacks_multiple_keys():
|
||||||
# print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}")
|
# print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}")
|
||||||
# print(f"backup key 2: {os.getenv('BACKUP_OPENAI_API_KEY_2')}")
|
# print(f"backup key 2: {os.getenv('BACKUP_OPENAI_API_KEY_2')}")
|
||||||
|
|
|
@ -1072,13 +1072,15 @@ def get_optional_params( # use the openai defaults
|
||||||
optional_params["stop"] = stop #TG AI expects a list, example ["\n\n\n\n","<|endoftext|>"]
|
optional_params["stop"] = stop #TG AI expects a list, example ["\n\n\n\n","<|endoftext|>"]
|
||||||
elif custom_llm_provider == "palm":
|
elif custom_llm_provider == "palm":
|
||||||
## check if unsupported param passed in
|
## check if unsupported param passed in
|
||||||
supported_params = ["temperature", "top_p"]
|
supported_params = ["temperature", "top_p", "stream"]
|
||||||
_check_valid_arg(supported_params=supported_params)
|
_check_valid_arg(supported_params=supported_params)
|
||||||
|
|
||||||
if temperature:
|
if temperature:
|
||||||
optional_params["temperature"] = temperature
|
optional_params["temperature"] = temperature
|
||||||
if top_p:
|
if top_p:
|
||||||
optional_params["top_p"] = top_p
|
optional_params["top_p"] = top_p
|
||||||
|
if stream:
|
||||||
|
optional_params["stream"] = stream
|
||||||
elif (
|
elif (
|
||||||
custom_llm_provider == "vertex_ai"
|
custom_llm_provider == "vertex_ai"
|
||||||
):
|
):
|
||||||
|
@ -1104,7 +1106,7 @@ def get_optional_params( # use the openai defaults
|
||||||
return_full_text: If True, input text will be part of the output generated text. If specified, it must be boolean. The default value for it is False.
|
return_full_text: If True, input text will be part of the output generated text. If specified, it must be boolean. The default value for it is False.
|
||||||
"""
|
"""
|
||||||
## check if unsupported param passed in
|
## check if unsupported param passed in
|
||||||
supported_params = ["temperature", "max_tokens"]
|
supported_params = ["temperature", "max_tokens", "stream"]
|
||||||
_check_valid_arg(supported_params=supported_params)
|
_check_valid_arg(supported_params=supported_params)
|
||||||
|
|
||||||
if max_tokens:
|
if max_tokens:
|
||||||
|
@ -1113,13 +1115,15 @@ def get_optional_params( # use the openai defaults
|
||||||
optional_params["temperature"] = temperature
|
optional_params["temperature"] = temperature
|
||||||
if top_p:
|
if top_p:
|
||||||
optional_params["top_p"] = top_p
|
optional_params["top_p"] = top_p
|
||||||
|
if stream:
|
||||||
|
optional_params["stream"] = stream
|
||||||
else:
|
else:
|
||||||
## check if unsupported param passed in
|
## check if unsupported param passed in
|
||||||
supported_params = []
|
supported_params = []
|
||||||
_check_valid_arg(supported_params=supported_params)
|
_check_valid_arg(supported_params=supported_params)
|
||||||
elif custom_llm_provider == "bedrock":
|
elif custom_llm_provider == "bedrock":
|
||||||
if "ai21" in model:
|
if "ai21" in model:
|
||||||
supported_params = ["max_tokens", "temperature", "stop", "top_p"]
|
supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"]
|
||||||
_check_valid_arg(supported_params=supported_params)
|
_check_valid_arg(supported_params=supported_params)
|
||||||
# params "maxTokens":200,"temperature":0,"topP":250,"stop_sequences":[],
|
# params "maxTokens":200,"temperature":0,"topP":250,"stop_sequences":[],
|
||||||
# https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=j2-ultra
|
# https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=j2-ultra
|
||||||
|
@ -1131,8 +1135,10 @@ def get_optional_params( # use the openai defaults
|
||||||
optional_params["stop_sequences"] = stop
|
optional_params["stop_sequences"] = stop
|
||||||
if top_p:
|
if top_p:
|
||||||
optional_params["topP"] = top_p
|
optional_params["topP"] = top_p
|
||||||
|
if stream:
|
||||||
|
optional_params["stream"] = stream
|
||||||
elif "anthropic" in model:
|
elif "anthropic" in model:
|
||||||
supported_params = ["max_tokens", "temperature", "stop", "top_p"]
|
supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"]
|
||||||
_check_valid_arg(supported_params=supported_params)
|
_check_valid_arg(supported_params=supported_params)
|
||||||
# anthropic params on bedrock
|
# anthropic params on bedrock
|
||||||
# \"max_tokens_to_sample\":300,\"temperature\":0.5,\"top_p\":1,\"stop_sequences\":[\"\\\\n\\\\nHuman:\"]}"
|
# \"max_tokens_to_sample\":300,\"temperature\":0.5,\"top_p\":1,\"stop_sequences\":[\"\\\\n\\\\nHuman:\"]}"
|
||||||
|
@ -1146,8 +1152,10 @@ def get_optional_params( # use the openai defaults
|
||||||
optional_params["top_p"] = top_p
|
optional_params["top_p"] = top_p
|
||||||
if stop:
|
if stop:
|
||||||
optional_params["stop_sequences"] = stop
|
optional_params["stop_sequences"] = stop
|
||||||
|
if stream:
|
||||||
|
optional_params["stream"] = stream
|
||||||
elif "amazon" in model: # amazon titan llms
|
elif "amazon" in model: # amazon titan llms
|
||||||
supported_params = ["max_tokens", "temperature", "stop", "top_p"]
|
supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"]
|
||||||
_check_valid_arg(supported_params=supported_params)
|
_check_valid_arg(supported_params=supported_params)
|
||||||
# see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large
|
# see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large
|
||||||
if max_tokens:
|
if max_tokens:
|
||||||
|
@ -1158,7 +1166,8 @@ def get_optional_params( # use the openai defaults
|
||||||
optional_params["stopSequences"] = stop
|
optional_params["stopSequences"] = stop
|
||||||
if top_p:
|
if top_p:
|
||||||
optional_params["topP"] = top_p
|
optional_params["topP"] = top_p
|
||||||
|
if stream:
|
||||||
|
optional_params["stream"] = stream
|
||||||
elif model in litellm.aleph_alpha_models:
|
elif model in litellm.aleph_alpha_models:
|
||||||
supported_params = ["max_tokens", "stream", "top_p", "temperature", "presence_penalty", "frequency_penalty", "n", "stop"]
|
supported_params = ["max_tokens", "stream", "top_p", "temperature", "presence_penalty", "frequency_penalty", "n", "stop"]
|
||||||
_check_valid_arg(supported_params=supported_params)
|
_check_valid_arg(supported_params=supported_params)
|
||||||
|
@ -3431,13 +3440,15 @@ def completion_with_split_tests(models={}, messages=[], use_client=False, overri
|
||||||
return litellm.completion(model=selected_llm, messages=messages, use_client=use_client, **kwargs)
|
return litellm.completion(model=selected_llm, messages=messages, use_client=use_client, **kwargs)
|
||||||
|
|
||||||
def completion_with_fallbacks(**kwargs):
|
def completion_with_fallbacks(**kwargs):
|
||||||
|
print(f"kwargs inside completion_with_fallbacks: {kwargs}")
|
||||||
|
nested_kwargs = kwargs.pop("kwargs")
|
||||||
response = None
|
response = None
|
||||||
rate_limited_models = set()
|
rate_limited_models = set()
|
||||||
model_expiration_times = {}
|
model_expiration_times = {}
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
original_model = kwargs["model"]
|
original_model = kwargs["model"]
|
||||||
fallbacks = [kwargs["model"]] + kwargs["fallbacks"]
|
fallbacks = [kwargs["model"]] + nested_kwargs["fallbacks"]
|
||||||
del kwargs["fallbacks"] # remove fallbacks so it's not recursive
|
del nested_kwargs["fallbacks"] # remove fallbacks so it's not recursive
|
||||||
|
|
||||||
while response == None and time.time() - start_time < 45:
|
while response == None and time.time() - start_time < 45:
|
||||||
for model in fallbacks:
|
for model in fallbacks:
|
||||||
|
@ -3466,8 +3477,10 @@ def completion_with_fallbacks(**kwargs):
|
||||||
if kwargs.get("model"):
|
if kwargs.get("model"):
|
||||||
del kwargs["model"]
|
del kwargs["model"]
|
||||||
|
|
||||||
|
print(f"trying to make completion call with model: {model}")
|
||||||
|
kwargs = {**kwargs, **nested_kwargs} # combine the openai + litellm params at the same level
|
||||||
response = litellm.completion(**kwargs, model=model)
|
response = litellm.completion(**kwargs, model=model)
|
||||||
|
print(f"response: {response}")
|
||||||
if response != None:
|
if response != None:
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue