From 10fe16c965511d4bd97e3100bde54b3701e021be Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 22 Nov 2023 10:04:27 -0800 Subject: [PATCH] fix(utils.py): add param mapping for perplexity, anyscale, deepinfra n n --- litellm/__init__.py | 4 +- litellm/main.py | 7 +- litellm/tests/test_completion.py | 8 +- litellm/tests/test_router.py | 135 ++++++++++++++++--------------- litellm/utils.py | 70 +++++++++++++--- 5 files changed, 141 insertions(+), 83 deletions(-) diff --git a/litellm/__init__.py b/litellm/__init__.py index c53f1e43f..fe6ceede7 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -144,7 +144,9 @@ for key, value in model_cost.items(): # known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary openai_compatible_endpoints: List = [ - "api.perplexity.ai" + "api.perplexity.ai", + "api.endpoints.anyscale.com/v1", + "api.deepinfra.com/v1/openai" ] diff --git a/litellm/main.py b/litellm/main.py index b9ed40fa0..820033e50 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -504,13 +504,16 @@ def completion( elif ( model in litellm.open_ai_chat_completion_models or custom_llm_provider == "custom_openai" + or custom_llm_provider == "deepinfra" + or custom_llm_provider == "perplexity" + or custom_llm_provider == "anyscale" or custom_llm_provider == "openai" or "ft:gpt-3.5-turbo" in model # finetune gpt-3.5-turbo ): # allow user to make an openai call with a custom base # note: if a user sets a custom base - we should ensure this works # allow for the setting of dynamic and stateful api-bases api_base = ( - api_base + api_base # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api base from there or litellm.api_base or get_secret("OPENAI_API_BASE") or "https://api.openai.com/v1" @@ -522,7 +525,7 @@ def completion( ) # set API KEY api_key = ( - api_key or + api_key or # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there dynamic_api_key or # allows us to read env variables for compatible openai api's like perplexity litellm.api_key or litellm.openai_key or diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 6ef0c601c..73857e03f 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -164,7 +164,9 @@ def test_completion_gpt4_vision(): def test_completion_perplexity_api(): try: - # litellm.set_verbose=True + litellm.set_verbose=True + litellm.num_retries = 0 + litellm.drop_params = True messages=[{ "role": "system", "content": "You're a good bot" @@ -178,12 +180,12 @@ def test_completion_perplexity_api(): response = completion( model="mistral-7b-instruct", messages=messages, - api_base="https://api.perplexity.ai") + api_base="https://api.perplexity.ai", stop="Hello") print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") -# test_completion_perplexity_api() +test_completion_perplexity_api() def test_completion_perplexity_api_2(): try: diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index cc5600702..d4d04a05e 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -56,87 +56,88 @@ load_dotenv() # test_openai_only() -def test_multiple_deployments(): - import concurrent, time - # litellm.set_verbose=True - futures = {} - model_list = [{ # list of model deployments - "model_name": "gpt-3.5-turbo", # openai model name - "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-v-2", - "api_key": os.getenv("AZURE_API_KEY"), - "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": os.getenv("AZURE_API_BASE") - }, - "tpm": 240000, - "rpm": 1800 - }, { - "model_name": "gpt-3.5-turbo", # openai model name - "litellm_params": { # params for litellm completion/embedding call - "model": "azure/chatgpt-functioncalling", - "api_key": os.getenv("AZURE_API_KEY"), - "api_version": os.getenv("AZURE_API_VERSION"), - "api_base": os.getenv("AZURE_API_BASE") - }, - "tpm": 240000, - "rpm": 1800 - }, { - "model_name": "gpt-3.5-turbo", # openai model name - "litellm_params": { # params for litellm completion/embedding call - "model": "gpt-3.5-turbo", - "api_key": os.getenv("OPENAI_API_KEY"), - }, - "tpm": 1000000, - "rpm": 9000 - }] +# def test_multiple_deployments(): +# import concurrent, time +# # litellm.set_verbose=True +# futures = {} +# model_list = [{ # list of model deployments +# "model_name": "gpt-3.5-turbo", # openai model name +# "litellm_params": { # params for litellm completion/embedding call +# "model": "azure/chatgpt-v-2", +# "api_key": os.getenv("AZURE_API_KEY"), +# "api_version": os.getenv("AZURE_API_VERSION"), +# "api_base": os.getenv("AZURE_API_BASE") +# }, +# "tpm": 240000, +# "rpm": 1800 +# }, { +# "model_name": "gpt-3.5-turbo", # openai model name +# "litellm_params": { # params for litellm completion/embedding call +# "model": "azure/chatgpt-functioncalling", +# "api_key": os.getenv("AZURE_API_KEY"), +# "api_version": os.getenv("AZURE_API_VERSION"), +# "api_base": os.getenv("AZURE_API_BASE") +# }, +# "tpm": 240000, +# "rpm": 1800 +# }, { +# "model_name": "gpt-3.5-turbo", # openai model name +# "litellm_params": { # params for litellm completion/embedding call +# "model": "gpt-3.5-turbo", +# "api_key": os.getenv("OPENAI_API_KEY"), +# }, +# "tpm": 1000000, +# "rpm": 9000 +# }] - router = Router(model_list=model_list, redis_host=os.getenv("REDIS_HOST"), redis_password=os.getenv("REDIS_PASSWORD"), redis_port=int(os.getenv("REDIS_PORT")), routing_strategy="latency-based-routing") # type: ignore +# router = Router(model_list=model_list, redis_host=os.getenv("REDIS_HOST"), redis_password=os.getenv("REDIS_PASSWORD"), redis_port=int(os.getenv("REDIS_PORT")), routing_strategy="latency-based-routing") # type: ignore +# # router = Router(model_list=model_list, redis_host=os.getenv("REDIS_HOST"), redis_password=os.getenv("REDIS_PASSWORD"), redis_port=int(os.getenv("REDIS_PORT"))) # type: ignore - results = [] - with ThreadPoolExecutor(max_workers=10) as executor: - kwargs = { - "model": "gpt-3.5-turbo", - "messages": [{"role": "user", "content": """Context: +# results = [] +# with ThreadPoolExecutor(max_workers=100) as executor: +# kwargs = { +# "model": "gpt-3.5-turbo", +# "messages": [{"role": "user", "content": """Context: -In the historical era of Ancient Greece, a multitude of significant individuals lived, contributing immensely to various disciplines like science, politics, philosophy, and literature. For instance, Socrates, a renowned philosopher, primarily focused on ethics. His notable method, the Socratic Method, involved acknowledging one's own ignorance to stimulate critical thinking and illuminate ideas. His student, Plato, another prominent figure, founded the Academy in Athens. He proposed theories on justice, beauty, and equality, and also introduced the theory of forms, which is pivotal to understanding his philosophical insights. Another student of Socrates, Xenophon, distinguished himself more in the domain of history and military affairs. +# In the historical era of Ancient Greece, a multitude of significant individuals lived, contributing immensely to various disciplines like science, politics, philosophy, and literature. For instance, Socrates, a renowned philosopher, primarily focused on ethics. His notable method, the Socratic Method, involved acknowledging one's own ignorance to stimulate critical thinking and illuminate ideas. His student, Plato, another prominent figure, founded the Academy in Athens. He proposed theories on justice, beauty, and equality, and also introduced the theory of forms, which is pivotal to understanding his philosophical insights. Another student of Socrates, Xenophon, distinguished himself more in the domain of history and military affairs. -Aristotle, who studied under Plato, led an equally remarkable life. His extensive works have been influential across various domains, including science, logic, metaphysics, ethics, and politics. Perhaps most notably, a substantial portion of the Western intellectual tradition traces back to his writings. He later tutored Alexander the Great who went on to create one of the most vast empires in the world. +# Aristotle, who studied under Plato, led an equally remarkable life. His extensive works have been influential across various domains, including science, logic, metaphysics, ethics, and politics. Perhaps most notably, a substantial portion of the Western intellectual tradition traces back to his writings. He later tutored Alexander the Great who went on to create one of the most vast empires in the world. -In the domain of mathematics, Pythagoras and Euclid made significant contributions. Pythagoras is best known for the Pythagorean theorem, a fundamental principle in geometry, while Euclid, often regarded as the father of geometry, wrote "The Elements", a collection of definitions, axioms, theorems, and proofs. +# In the domain of mathematics, Pythagoras and Euclid made significant contributions. Pythagoras is best known for the Pythagorean theorem, a fundamental principle in geometry, while Euclid, often regarded as the father of geometry, wrote "The Elements", a collection of definitions, axioms, theorems, and proofs. -Apart from these luminaries, the period also saw a number of influential political figures. Pericles, a prominent and influential Greek statesman, orator, and general of Athens during the Golden Age, specifically between the Persian and Peloponnesian wars, played a significant role in developing the Athenian democracy. +# Apart from these luminaries, the period also saw a number of influential political figures. Pericles, a prominent and influential Greek statesman, orator, and general of Athens during the Golden Age, specifically between the Persian and Peloponnesian wars, played a significant role in developing the Athenian democracy. -The Ancient Greek era also witnessed extraordinary advancements in arts and literature. Homer, credited with the creation of the epic poems 'The Iliad' and 'The Odyssey,' is considered one of the greatest poets in history. The tragedies of Sophocles, Aeschylus, and Euripides left an indelible mark on the field of drama, and the comedies of Aristophanes remain influential even today. +# The Ancient Greek era also witnessed extraordinary advancements in arts and literature. Homer, credited with the creation of the epic poems 'The Iliad' and 'The Odyssey,' is considered one of the greatest poets in history. The tragedies of Sophocles, Aeschylus, and Euripides left an indelible mark on the field of drama, and the comedies of Aristophanes remain influential even today. ---- -Question: +# --- +# Question: -Who among the mentioned figures from Ancient Greece contributed to the domain of mathematics and what are their significant contributions?"""}], - } +# Who among the mentioned figures from Ancient Greece contributed to the domain of mathematics and what are their significant contributions?"""}], +# } - start_time = time.time() - for _ in range(1000): - future = executor.submit(router.completion, **kwargs) - futures[future] = future +# start_time = time.time() +# for _ in range(1000): +# future = executor.submit(router.completion, **kwargs) +# futures[future] = future - # Retrieve the results from the futures - while futures: - done, not_done = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED) - for future in done: - try: - result = future.result() - results.append(result) - del futures[future] - except Exception as e: - print(f"Exception: {e}; traceback: {traceback.format_exc()}") - del futures[future] # remove the done future +# # Retrieve the results from the futures +# while futures: +# done, not_done = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED) +# for future in done: +# try: +# result = future.result() +# results.append(result) +# del futures[future] +# except Exception as e: +# print(f"Exception: {e}; traceback: {traceback.format_exc()}") +# del futures[future] # remove the done future - end_time = time.time() - print(f"ELAPSED TIME: {end_time-start_time}") - # Check results +# end_time = time.time() +# print(f"ELAPSED TIME: {end_time-start_time}") +# # Check results -test_multiple_deployments() +# test_multiple_deployments() ### FUNCTION CALLING def test_function_calling(): diff --git a/litellm/utils.py b/litellm/utils.py index 10601b653..9af58db33 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1854,7 +1854,7 @@ def get_optional_params( # use the openai defaults raise UnsupportedParamsError(status_code=500, message=f"Function calling is not supported by {custom_llm_provider}. To add it to the prompt, set `litellm.add_function_to_prompt = True`.") def _check_valid_arg(supported_params): - print_verbose(f"\nLiteLLM completion() model= {model}") + print_verbose(f"\nLiteLLM completion() model= {model}; provider = {custom_llm_provider}") print_verbose(f"\nLiteLLM: Params passed to completion() {passed_params}") print_verbose(f"\nLiteLLM: Non-Default params passed to completion() {non_default_params}") unsupported_params = {} @@ -1867,7 +1867,6 @@ def get_optional_params( # use the openai defaults unsupported_params[k] = non_default_params[k] if unsupported_params and not litellm.drop_params: raise UnsupportedParamsError(status_code=500, message=f"{custom_llm_provider} does not support parameters: {unsupported_params}. To drop these, set `litellm.drop_params=True`.") - ## raise exception if provider doesn't support passed in param if custom_llm_provider == "anthropic": ## check if unsupported param passed in @@ -2158,7 +2157,7 @@ def get_optional_params( # use the openai defaults optional_params["presence_penalty"] = presence_penalty if stop is not None: optional_params["stop_sequences"] = stop - elif model in litellm.aleph_alpha_models: + elif custom_llm_provider == "aleph_alpha": supported_params = ["max_tokens", "stream", "top_p", "temperature", "presence_penalty", "frequency_penalty", "n", "stop"] _check_valid_arg(supported_params=supported_params) if max_tokens is not None: @@ -2193,7 +2192,7 @@ def get_optional_params( # use the openai defaults optional_params["repeat_penalty"] = frequency_penalty if stop is not None: optional_params["stop_sequences"] = stop - elif model in litellm.nlp_cloud_models or custom_llm_provider == "nlp_cloud": + elif custom_llm_provider == "nlp_cloud": supported_params = ["max_tokens", "stream", "temperature", "top_p", "presence_penalty", "frequency_penalty", "n", "stop"] _check_valid_arg(supported_params=supported_params) @@ -2213,7 +2212,7 @@ def get_optional_params( # use the openai defaults optional_params["num_return_sequences"] = n if stop is not None: optional_params["stop_sequences"] = stop - elif model in litellm.petals_models or custom_llm_provider == "petals": + elif custom_llm_provider == "petals": supported_params = ["max_tokens", "temperature", "top_p", "stream"] _check_valid_arg(supported_params=supported_params) # max_new_tokens=1,temperature=0.9, top_p=0.6 @@ -2228,11 +2227,59 @@ def get_optional_params( # use the openai defaults elif custom_llm_provider == "deepinfra": supported_params = ["temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user"] _check_valid_arg(supported_params=supported_params) + if temperature is not None: + if temperature == 0 and model == "mistralai/Mistral-7B-Instruct-v0.1": # this model does no support temperature == 0 + temperature = 0.0001 # close to 0 + optional_params["temperature"] = temperature + if top_p: + optional_params["top_p"] = top_p + if n: + optional_params["n"] = n + if stream: + optional_params["stream"] = str + if stop: + optional_params["stop"] = stop + if max_tokens: + optional_params["max_tokens"] = max_tokens + if presence_penalty: + optional_params["presence_penalty"] = presence_penalty + if frequency_penalty: + optional_params["frequency_penalty"] = frequency_penalty + if logit_bias: + optional_params["logit_bias"] = logit_bias + if user: + optional_params["user"] = user + elif custom_llm_provider == "perplexity": + supported_params = ["temperature", "top_p", "stream", "max_tokens", "presence_penalty", "frequency_penalty"] + _check_valid_arg(supported_params=supported_params) + if temperature is not None: + if temperature == 0 and model == "mistral-7b-instruct": # this model does no support temperature == 0 + temperature = 0.0001 # close to 0 + optional_params["temperature"] = temperature + if top_p: + optional_params["top_p"] = top_p + if stream: + optional_params["stream"] = stream + if max_tokens: + optional_params["max_tokens"] = max_tokens + if presence_penalty: + optional_params["presence_penalty"] = presence_penalty + if frequency_penalty: + optional_params["frequency_penalty"] = frequency_penalty + elif custom_llm_provider == "anyscale": + supported_params = ["temperature", "top_p", "stream", "max_tokens"] + _check_valid_arg(supported_params=supported_params) optional_params = non_default_params if temperature is not None: if temperature == 0 and model == "mistralai/Mistral-7B-Instruct-v0.1": # this model does no support temperature == 0 temperature = 0.0001 # close to 0 optional_params["temperature"] = temperature + if top_p: + optional_params["top_p"] = top_p + if stream: + optional_params["stream"] = stream + if max_tokens: + optional_params["max_tokens"] = max_tokens else: # assume passing in params for openai/azure openai supported_params = ["functions", "function_call", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "response_format", "seed", "tools", "tool_choice", "max_retries"] _check_valid_arg(supported_params=supported_params) @@ -2259,26 +2306,29 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_ # perplexity is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.perplexity.ai api_base = "https://api.perplexity.ai" dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY") - custom_llm_provider = "custom_openai" elif custom_llm_provider == "anyscale": # anyscale is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1 api_base = "https://api.endpoints.anyscale.com/v1" dynamic_api_key = os.getenv("ANYSCALE_API_KEY") - custom_llm_provider = "custom_openai" elif custom_llm_provider == "deepinfra": # deepinfra is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1 api_base = "https://api.deepinfra.com/v1/openai" dynamic_api_key = os.getenv("DEEPINFRA_API_KEY") - custom_llm_provider = "custom_openai" return model, custom_llm_provider, dynamic_api_key, api_base # check if api base is a known openai compatible endpoint if api_base: for endpoint in litellm.openai_compatible_endpoints: if endpoint in api_base: - custom_llm_provider = "custom_openai" - if endpoint == "api.perplexity.ai": + if endpoint == "api.perplexity.ai": + custom_llm_provider = "perplexity" dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY") + elif endpoint == "api.endpoints.anyscale.com/v1": + custom_llm_provider = "anyscale" + dynamic_api_key = os.getenv("ANYSCALE_API_KEY") + elif endpoint == "api.deepinfra.com/v1/openai": + custom_llm_provider = "deepinfra" + dynamic_api_key = os.getenv("DEEPINFRA_API_KEY") return model, custom_llm_provider, dynamic_api_key, api_base # check if model in known model provider list -> for huggingface models, raise exception as they don't have a fixed provider (can be togetherai, anyscale, baseten, runpod, et.)