diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index fda9a8790..6ebbe2ec9 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 4021680ec..a9133ee9d 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/main.py b/litellm/main.py index ec14064b0..5b1871b28 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -202,6 +202,7 @@ def completion( - If 'mock_response' is provided, a mock completion response is returned for testing or debugging. """ ######### unpacking kwargs ##################### + args = locals() return_async = kwargs.get('return_async', False) mock_response = kwargs.get('mock_response', None) api_key = kwargs.get('api_key', None) @@ -216,9 +217,8 @@ def completion( metadata = kwargs.get('metadata', None) fallbacks = kwargs.get('fallbacks', []) ######## end of unpacking kwargs ########### - args = locals() openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "metadata"] - litellm_params = ["caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "metadata", "fallbacks"] + litellm_params = ["acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "metadata", "fallbacks"] default_params = openai_params + litellm_params non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider if mock_response: @@ -797,7 +797,7 @@ def completion( logging_obj=logging ) # fake palm streaming - if stream == True: + if "stream" in optional_params and optional_params["stream"] == True: # fake streaming for palm resp_string = model_response["choices"][0]["message"]["content"] response = CustomStreamWrapper( @@ -836,7 +836,6 @@ def completion( if k not in optional_params: optional_params[k] = v - print(f"optional_params: {optional_params}") ## LOGGING logging.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params}) @@ -979,7 +978,7 @@ def completion( logging_obj=logging ) - if stream==True: ## [BETA] + if "stream" in optional_params and optional_params["stream"]==True: ## [BETA] # sagemaker does not support streaming as of now so we're faking streaming: # https://discuss.huggingface.co/t/streaming-output-text-when-deploying-on-sagemaker/39611 # "SageMaker is currently not supporting streaming responses." @@ -1009,7 +1008,7 @@ def completion( ) - if stream == True: + if "stream" in optional_params and optional_params["stream"] == True: # don't try to access stream object, response = CustomStreamWrapper( iter(model_response), model, custom_llm_provider="bedrock", logging_obj=logging diff --git a/litellm/tests/test_async_fn.py b/litellm/tests/test_async_fn.py index 4cb59a20c..80642ea01 100644 --- a/litellm/tests/test_async_fn.py +++ b/litellm/tests/test_async_fn.py @@ -19,10 +19,8 @@ async def test_get_response(): response = await acompletion(model="gpt-3.5-turbo", messages=messages) except Exception as e: pass - return response - -# response = asyncio.run(test_get_response()) +response = asyncio.run(test_get_response()) # print(response) @pytest.mark.asyncio diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index aa6c858f2..ba9390f16 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -47,7 +47,6 @@ def test_completion_claude(): print(response.response_ms) except Exception as e: pytest.fail(f"Error occurred: {e}") -test_completion_claude() def test_completion_claude_max_tokens(): try: @@ -531,7 +530,7 @@ def test_completion_openai_with_more_optional_params(): except Exception as e: pytest.fail(f"Error occurred: {e}") -test_completion_openai_with_more_optional_params() +# test_completion_openai_with_more_optional_params() # def test_completion_openai_azure_with_functions(): # function1 = [ # { @@ -916,7 +915,8 @@ def test_completion_bedrock_ai21(): def test_completion_with_fallbacks(): - fallbacks = ["gpt-3.5-turb", "gpt-3.5-turbo", "command-nightly"] + print(f"RUNNING TEST COMPLETION WITH FALLBACKS - test_completion_with_fallbacks") + fallbacks = ["gpt-3.5-turbo", "gpt-3.5-turbo", "command-nightly"] try: response = completion( model="bad-model", messages=messages, force_timeout=120, fallbacks=fallbacks @@ -926,6 +926,7 @@ def test_completion_with_fallbacks(): except Exception as e: pytest.fail(f"Error occurred: {e}") +test_completion_with_fallbacks() # def test_completion_with_fallbacks_multiple_keys(): # print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}") # print(f"backup key 2: {os.getenv('BACKUP_OPENAI_API_KEY_2')}") diff --git a/litellm/utils.py b/litellm/utils.py index 798ac457a..c03071362 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1072,13 +1072,15 @@ def get_optional_params( # use the openai defaults optional_params["stop"] = stop #TG AI expects a list, example ["\n\n\n\n","<|endoftext|>"] elif custom_llm_provider == "palm": ## check if unsupported param passed in - supported_params = ["temperature", "top_p"] + supported_params = ["temperature", "top_p", "stream"] _check_valid_arg(supported_params=supported_params) if temperature: optional_params["temperature"] = temperature if top_p: optional_params["top_p"] = top_p + if stream: + optional_params["stream"] = stream elif ( custom_llm_provider == "vertex_ai" ): @@ -1104,7 +1106,7 @@ def get_optional_params( # use the openai defaults return_full_text: If True, input text will be part of the output generated text. If specified, it must be boolean. The default value for it is False. """ ## check if unsupported param passed in - supported_params = ["temperature", "max_tokens"] + supported_params = ["temperature", "max_tokens", "stream"] _check_valid_arg(supported_params=supported_params) if max_tokens: @@ -1113,13 +1115,15 @@ def get_optional_params( # use the openai defaults optional_params["temperature"] = temperature if top_p: optional_params["top_p"] = top_p + if stream: + optional_params["stream"] = stream else: ## check if unsupported param passed in supported_params = [] _check_valid_arg(supported_params=supported_params) elif custom_llm_provider == "bedrock": if "ai21" in model: - supported_params = ["max_tokens", "temperature", "stop", "top_p"] + supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"] _check_valid_arg(supported_params=supported_params) # params "maxTokens":200,"temperature":0,"topP":250,"stop_sequences":[], # https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=j2-ultra @@ -1131,8 +1135,10 @@ def get_optional_params( # use the openai defaults optional_params["stop_sequences"] = stop if top_p: optional_params["topP"] = top_p + if stream: + optional_params["stream"] = stream elif "anthropic" in model: - supported_params = ["max_tokens", "temperature", "stop", "top_p"] + supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"] _check_valid_arg(supported_params=supported_params) # anthropic params on bedrock # \"max_tokens_to_sample\":300,\"temperature\":0.5,\"top_p\":1,\"stop_sequences\":[\"\\\\n\\\\nHuman:\"]}" @@ -1146,8 +1152,10 @@ def get_optional_params( # use the openai defaults optional_params["top_p"] = top_p if stop: optional_params["stop_sequences"] = stop + if stream: + optional_params["stream"] = stream elif "amazon" in model: # amazon titan llms - supported_params = ["max_tokens", "temperature", "stop", "top_p"] + supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"] _check_valid_arg(supported_params=supported_params) # see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large if max_tokens: @@ -1158,7 +1166,8 @@ def get_optional_params( # use the openai defaults optional_params["stopSequences"] = stop if top_p: optional_params["topP"] = top_p - + if stream: + optional_params["stream"] = stream elif model in litellm.aleph_alpha_models: supported_params = ["max_tokens", "stream", "top_p", "temperature", "presence_penalty", "frequency_penalty", "n", "stop"] _check_valid_arg(supported_params=supported_params) @@ -3431,13 +3440,15 @@ def completion_with_split_tests(models={}, messages=[], use_client=False, overri return litellm.completion(model=selected_llm, messages=messages, use_client=use_client, **kwargs) def completion_with_fallbacks(**kwargs): + print(f"kwargs inside completion_with_fallbacks: {kwargs}") + nested_kwargs = kwargs.pop("kwargs") response = None rate_limited_models = set() model_expiration_times = {} start_time = time.time() original_model = kwargs["model"] - fallbacks = [kwargs["model"]] + kwargs["fallbacks"] - del kwargs["fallbacks"] # remove fallbacks so it's not recursive + fallbacks = [kwargs["model"]] + nested_kwargs["fallbacks"] + del nested_kwargs["fallbacks"] # remove fallbacks so it's not recursive while response == None and time.time() - start_time < 45: for model in fallbacks: @@ -3466,8 +3477,10 @@ def completion_with_fallbacks(**kwargs): if kwargs.get("model"): del kwargs["model"] + print(f"trying to make completion call with model: {model}") + kwargs = {**kwargs, **nested_kwargs} # combine the openai + litellm params at the same level response = litellm.completion(**kwargs, model=model) - + print(f"response: {response}") if response != None: return response