fix(utils.py): add param mapping for perplexity, anyscale, deepinfra

n n
2025-04-24 18:24:20 +00:00 · 2023-11-22 10:04:27 -08:00 · 2023-11-22 10:04:27 -08:00 · 10fe16c965
commit 10fe16c965
parent e7bb4a0cbd
5 changed files with 141 additions and 83 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -144,7 +144,9 @@ for key, value in model_cost.items():

 # known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary
 openai_compatible_endpoints: List = [
-    "api.perplexity.ai"
+    "api.perplexity.ai", 
+    "api.endpoints.anyscale.com/v1",
+    "api.deepinfra.com/v1/openai"
 ]


--- a/litellm/main.py
+++ b/litellm/main.py
@ -504,13 +504,16 @@ def completion(
        elif (
            model in litellm.open_ai_chat_completion_models
            or custom_llm_provider == "custom_openai"
+            or custom_llm_provider == "deepinfra"
+            or custom_llm_provider == "perplexity"
+            or custom_llm_provider == "anyscale"
            or custom_llm_provider == "openai"
            or "ft:gpt-3.5-turbo" in model  # finetune gpt-3.5-turbo
        ):  # allow user to make an openai call with a custom base
            # note: if a user sets a custom base - we should ensure this works
            # allow for the setting of dynamic and stateful api-bases
            api_base = (
-                api_base
+                api_base # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api base from there
                or litellm.api_base
                or get_secret("OPENAI_API_BASE")
                or "https://api.openai.com/v1"
@ -522,7 +525,7 @@ def completion(
            )
            # set API KEY
            api_key = (
-                api_key or
+                api_key or # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
                dynamic_api_key or # allows us to read env variables for compatible openai api's like perplexity 
                litellm.api_key or
                litellm.openai_key or
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -164,7 +164,9 @@ def test_completion_gpt4_vision():

 def test_completion_perplexity_api():
    try:
-        # litellm.set_verbose=True
+        litellm.set_verbose=True
+        litellm.num_retries = 0
+        litellm.drop_params = True
        messages=[{
            "role": "system", 
            "content": "You're a good bot"
@ -178,12 +180,12 @@ def test_completion_perplexity_api():
        response = completion(
            model="mistral-7b-instruct", 
            messages=messages,
-            api_base="https://api.perplexity.ai")
+            api_base="https://api.perplexity.ai", stop="Hello")
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

-# test_completion_perplexity_api()
+test_completion_perplexity_api()

 def test_completion_perplexity_api_2():
    try:
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -56,87 +56,88 @@ load_dotenv()
 # test_openai_only()


-def test_multiple_deployments(): 
-	import concurrent, time
-	# litellm.set_verbose=True
-	futures = {}
-	model_list = [{ # list of model deployments 
-		"model_name": "gpt-3.5-turbo", # openai model name 
-		"litellm_params": { # params for litellm completion/embedding call 
-			"model": "azure/chatgpt-v-2", 
-			"api_key": os.getenv("AZURE_API_KEY"),
-			"api_version": os.getenv("AZURE_API_VERSION"),
-			"api_base": os.getenv("AZURE_API_BASE")
-		},
-		"tpm": 240000,
-		"rpm": 1800
-	}, {
-		"model_name": "gpt-3.5-turbo", # openai model name 
-		"litellm_params": { # params for litellm completion/embedding call 
-			"model": "azure/chatgpt-functioncalling", 
-			"api_key": os.getenv("AZURE_API_KEY"),
-			"api_version": os.getenv("AZURE_API_VERSION"),
-			"api_base": os.getenv("AZURE_API_BASE")
-		},
-		"tpm": 240000,
-		"rpm": 1800
-	}, {
-		"model_name": "gpt-3.5-turbo", # openai model name 
-		"litellm_params": { # params for litellm completion/embedding call 
-			"model": "gpt-3.5-turbo", 
-			"api_key": os.getenv("OPENAI_API_KEY"),
-		},
-		"tpm": 1000000,
-		"rpm": 9000
-	}]
+# def test_multiple_deployments(): 
+# 	import concurrent, time
+# 	# litellm.set_verbose=True
+# 	futures = {}
+# 	model_list = [{ # list of model deployments 
+# 		"model_name": "gpt-3.5-turbo", # openai model name 
+# 		"litellm_params": { # params for litellm completion/embedding call 
+# 			"model": "azure/chatgpt-v-2", 
+# 			"api_key": os.getenv("AZURE_API_KEY"),
+# 			"api_version": os.getenv("AZURE_API_VERSION"),
+# 			"api_base": os.getenv("AZURE_API_BASE")
+# 		},
+# 		"tpm": 240000,
+# 		"rpm": 1800
+# 	}, {
+# 		"model_name": "gpt-3.5-turbo", # openai model name 
+# 		"litellm_params": { # params for litellm completion/embedding call 
+# 			"model": "azure/chatgpt-functioncalling", 
+# 			"api_key": os.getenv("AZURE_API_KEY"),
+# 			"api_version": os.getenv("AZURE_API_VERSION"),
+# 			"api_base": os.getenv("AZURE_API_BASE")
+# 		},
+# 		"tpm": 240000,
+# 		"rpm": 1800
+# 	}, {
+# 		"model_name": "gpt-3.5-turbo", # openai model name 
+# 		"litellm_params": { # params for litellm completion/embedding call 
+# 			"model": "gpt-3.5-turbo", 
+# 			"api_key": os.getenv("OPENAI_API_KEY"),
+# 		},
+# 		"tpm": 1000000,
+# 		"rpm": 9000
+# 	}]

-	router = Router(model_list=model_list, redis_host=os.getenv("REDIS_HOST"), redis_password=os.getenv("REDIS_PASSWORD"), redis_port=int(os.getenv("REDIS_PORT")), routing_strategy="latency-based-routing") # type: ignore
+# 	router = Router(model_list=model_list, redis_host=os.getenv("REDIS_HOST"), redis_password=os.getenv("REDIS_PASSWORD"), redis_port=int(os.getenv("REDIS_PORT")), routing_strategy="latency-based-routing") # type: ignore
+# 	# router = Router(model_list=model_list, redis_host=os.getenv("REDIS_HOST"), redis_password=os.getenv("REDIS_PASSWORD"), redis_port=int(os.getenv("REDIS_PORT"))) # type: ignore

-	results = [] 
-	with ThreadPoolExecutor(max_workers=10) as executor:
-		kwargs = {
-			"model": "gpt-3.5-turbo",
-			"messages": [{"role": "user", "content": """Context:
+# 	results = [] 
+# 	with ThreadPoolExecutor(max_workers=100) as executor:
+# 		kwargs = {
+# 			"model": "gpt-3.5-turbo",
+# 			"messages": [{"role": "user", "content": """Context:

-In the historical era of Ancient Greece, a multitude of significant individuals lived, contributing immensely to various disciplines like science, politics, philosophy, and literature. For instance, Socrates, a renowned philosopher, primarily focused on ethics. His notable method, the Socratic Method, involved acknowledging one's own ignorance to stimulate critical thinking and illuminate ideas. His student, Plato, another prominent figure, founded the Academy in Athens. He proposed theories on justice, beauty, and equality, and also introduced the theory of forms, which is pivotal to understanding his philosophical insights. Another student of Socrates, Xenophon, distinguished himself more in the domain of history and military affairs.
+# In the historical era of Ancient Greece, a multitude of significant individuals lived, contributing immensely to various disciplines like science, politics, philosophy, and literature. For instance, Socrates, a renowned philosopher, primarily focused on ethics. His notable method, the Socratic Method, involved acknowledging one's own ignorance to stimulate critical thinking and illuminate ideas. His student, Plato, another prominent figure, founded the Academy in Athens. He proposed theories on justice, beauty, and equality, and also introduced the theory of forms, which is pivotal to understanding his philosophical insights. Another student of Socrates, Xenophon, distinguished himself more in the domain of history and military affairs.

-Aristotle, who studied under Plato, led an equally remarkable life. His extensive works have been influential across various domains, including science, logic, metaphysics, ethics, and politics. Perhaps most notably, a substantial portion of the Western intellectual tradition traces back to his writings. He later tutored Alexander the Great who went on to create one of the most vast empires in the world.
+# Aristotle, who studied under Plato, led an equally remarkable life. His extensive works have been influential across various domains, including science, logic, metaphysics, ethics, and politics. Perhaps most notably, a substantial portion of the Western intellectual tradition traces back to his writings. He later tutored Alexander the Great who went on to create one of the most vast empires in the world.

-In the domain of mathematics, Pythagoras and Euclid made significant contributions. Pythagoras is best known for the Pythagorean theorem, a fundamental principle in geometry, while Euclid, often regarded as the father of geometry, wrote "The Elements", a collection of definitions, axioms, theorems, and proofs. 
+# In the domain of mathematics, Pythagoras and Euclid made significant contributions. Pythagoras is best known for the Pythagorean theorem, a fundamental principle in geometry, while Euclid, often regarded as the father of geometry, wrote "The Elements", a collection of definitions, axioms, theorems, and proofs. 

-Apart from these luminaries, the period also saw a number of influential political figures. Pericles, a prominent and influential Greek statesman, orator, and general of Athens during the Golden Age, specifically between the Persian and Peloponnesian wars, played a significant role in developing the Athenian democracy.
+# Apart from these luminaries, the period also saw a number of influential political figures. Pericles, a prominent and influential Greek statesman, orator, and general of Athens during the Golden Age, specifically between the Persian and Peloponnesian wars, played a significant role in developing the Athenian democracy.

-The Ancient Greek era also witnessed extraordinary advancements in arts and literature. Homer, credited with the creation of the epic poems 'The Iliad' and 'The Odyssey,' is considered one of the greatest poets in history. The tragedies of Sophocles, Aeschylus, and Euripides left an indelible mark on the field of drama, and the comedies of Aristophanes remain influential even today.
+# The Ancient Greek era also witnessed extraordinary advancements in arts and literature. Homer, credited with the creation of the epic poems 'The Iliad' and 'The Odyssey,' is considered one of the greatest poets in history. The tragedies of Sophocles, Aeschylus, and Euripides left an indelible mark on the field of drama, and the comedies of Aristophanes remain influential even today.

---
-Question: 
+# ---
+# Question: 

-Who among the mentioned figures from Ancient Greece contributed to the domain of mathematics and what are their significant contributions?"""}],
-		}
+# Who among the mentioned figures from Ancient Greece contributed to the domain of mathematics and what are their significant contributions?"""}],
+# 		}

-		start_time = time.time()
-		for _ in range(1000):
-			future = executor.submit(router.completion, **kwargs)
-			futures[future] = future
+# 		start_time = time.time()
+# 		for _ in range(1000):
+# 			future = executor.submit(router.completion, **kwargs)
+# 			futures[future] = future

-		# Retrieve the results from the futures
-		while futures:
-			done, not_done = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)
-			for future in done:
-				try:
-					result = future.result()
-					results.append(result)
-					del futures[future]
-				except Exception as e:
-					print(f"Exception: {e}; traceback: {traceback.format_exc()}")
-					del futures[future]  # remove the done future
+# 		# Retrieve the results from the futures
+# 		while futures:
+# 			done, not_done = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)
+# 			for future in done:
+# 				try:
+# 					result = future.result()
+# 					results.append(result)
+# 					del futures[future]
+# 				except Exception as e:
+# 					print(f"Exception: {e}; traceback: {traceback.format_exc()}")
+# 					del futures[future]  # remove the done future

-		end_time = time.time() 
-		print(f"ELAPSED TIME: {end_time-start_time}")
-		# Check results
+# 		end_time = time.time() 
+# 		print(f"ELAPSED TIME: {end_time-start_time}")
+# 		# Check results


-test_multiple_deployments()
+# test_multiple_deployments()
 ### FUNCTION CALLING 

 def test_function_calling(): 
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1854,7 +1854,7 @@ def get_optional_params(  # use the openai defaults
                raise UnsupportedParamsError(status_code=500, message=f"Function calling is not supported by {custom_llm_provider}. To add it to the prompt, set `litellm.add_function_to_prompt = True`.")

    def _check_valid_arg(supported_params): 
-        print_verbose(f"\nLiteLLM completion() model= {model}")
+        print_verbose(f"\nLiteLLM completion() model= {model}; provider = {custom_llm_provider}")
        print_verbose(f"\nLiteLLM: Params passed to completion() {passed_params}")
        print_verbose(f"\nLiteLLM: Non-Default params passed to completion() {non_default_params}")
        unsupported_params = {}
@ -1867,7 +1867,6 @@ def get_optional_params(  # use the openai defaults
                    unsupported_params[k] = non_default_params[k]
        if unsupported_params and not litellm.drop_params:
            raise UnsupportedParamsError(status_code=500, message=f"{custom_llm_provider} does not support parameters: {unsupported_params}. To drop these, set `litellm.drop_params=True`.")
-
    ## raise exception if provider doesn't support passed in param 
    if custom_llm_provider == "anthropic":
        ## check if unsupported param passed in 
@ -2158,7 +2157,7 @@ def get_optional_params(  # use the openai defaults
                optional_params["presence_penalty"] = presence_penalty
            if stop is not None:
                optional_params["stop_sequences"] = stop
-    elif model in litellm.aleph_alpha_models:
+    elif custom_llm_provider == "aleph_alpha":
        supported_params = ["max_tokens", "stream", "top_p", "temperature", "presence_penalty", "frequency_penalty", "n", "stop"]
        _check_valid_arg(supported_params=supported_params)
        if max_tokens is not None:
@ -2193,7 +2192,7 @@ def get_optional_params(  # use the openai defaults
            optional_params["repeat_penalty"] = frequency_penalty
        if stop is not None:
            optional_params["stop_sequences"] = stop
-    elif model in litellm.nlp_cloud_models or custom_llm_provider == "nlp_cloud":
+    elif custom_llm_provider == "nlp_cloud":
        supported_params = ["max_tokens", "stream", "temperature", "top_p", "presence_penalty", "frequency_penalty", "n", "stop"]
        _check_valid_arg(supported_params=supported_params)

@ -2213,7 +2212,7 @@ def get_optional_params(  # use the openai defaults
            optional_params["num_return_sequences"] = n
        if stop is not None:
            optional_params["stop_sequences"] = stop
-    elif model in litellm.petals_models or custom_llm_provider == "petals":
+    elif custom_llm_provider == "petals":
        supported_params = ["max_tokens", "temperature", "top_p", "stream"]
        _check_valid_arg(supported_params=supported_params)
        # max_new_tokens=1,temperature=0.9, top_p=0.6
@ -2228,11 +2227,59 @@ def get_optional_params(  # use the openai defaults
    elif custom_llm_provider == "deepinfra":
        supported_params = ["temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user"]
        _check_valid_arg(supported_params=supported_params)
+        if temperature is not None:
+            if temperature == 0 and model == "mistralai/Mistral-7B-Instruct-v0.1": # this model does no support temperature == 0
+                temperature = 0.0001 # close to 0
+            optional_params["temperature"] = temperature
+        if top_p:
+            optional_params["top_p"] = top_p
+        if n: 
+            optional_params["n"] = n
+        if stream: 
+            optional_params["stream"] = str
+        if stop: 
+            optional_params["stop"] = stop
+        if max_tokens: 
+            optional_params["max_tokens"] = max_tokens
+        if presence_penalty: 
+            optional_params["presence_penalty"] = presence_penalty
+        if frequency_penalty: 
+            optional_params["frequency_penalty"] = frequency_penalty
+        if logit_bias: 
+            optional_params["logit_bias"] = logit_bias
+        if user: 
+            optional_params["user"] = user
+    elif custom_llm_provider == "perplexity":
+        supported_params = ["temperature", "top_p", "stream", "max_tokens", "presence_penalty", "frequency_penalty"]
+        _check_valid_arg(supported_params=supported_params)
+        if temperature is not None:
+            if temperature == 0 and model == "mistral-7b-instruct": # this model does no support temperature == 0
+                temperature = 0.0001 # close to 0
+            optional_params["temperature"] = temperature
+        if top_p: 
+            optional_params["top_p"] = top_p
+        if stream: 
+            optional_params["stream"] = stream
+        if max_tokens: 
+            optional_params["max_tokens"] = max_tokens
+        if presence_penalty: 
+            optional_params["presence_penalty"] = presence_penalty
+        if frequency_penalty: 
+            optional_params["frequency_penalty"] = frequency_penalty
+    elif custom_llm_provider == "anyscale":
+        supported_params = ["temperature", "top_p", "stream", "max_tokens"]
+        _check_valid_arg(supported_params=supported_params)
        optional_params = non_default_params
        if temperature is not None:
            if temperature == 0 and model == "mistralai/Mistral-7B-Instruct-v0.1": # this model does no support temperature == 0
                temperature = 0.0001 # close to 0
            optional_params["temperature"] = temperature
+        if top_p: 
+            optional_params["top_p"] = top_p
+        if stream: 
+            optional_params["stream"] = stream
+        if max_tokens: 
+            optional_params["max_tokens"] = max_tokens
    else:  # assume passing in params for openai/azure openai
        supported_params = ["functions", "function_call", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "response_format", "seed", "tools", "tool_choice", "max_retries"]
        _check_valid_arg(supported_params=supported_params)
@ -2259,26 +2306,29 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_
                # perplexity is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.perplexity.ai
                api_base = "https://api.perplexity.ai"
                dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY")
-                custom_llm_provider = "custom_openai"
            elif custom_llm_provider == "anyscale": 
                # anyscale is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
                api_base = "https://api.endpoints.anyscale.com/v1"
                dynamic_api_key = os.getenv("ANYSCALE_API_KEY")
-                custom_llm_provider = "custom_openai"
            elif custom_llm_provider == "deepinfra": 
                # deepinfra is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
                api_base = "https://api.deepinfra.com/v1/openai"
                dynamic_api_key = os.getenv("DEEPINFRA_API_KEY")
-                custom_llm_provider = "custom_openai"
            return model, custom_llm_provider, dynamic_api_key, api_base

        # check if api base is a known openai compatible endpoint
        if api_base: 
            for endpoint in litellm.openai_compatible_endpoints:
                if endpoint in api_base:
-                    custom_llm_provider = "custom_openai"
-                    if endpoint == "api.perplexity.ai": 
+                    if endpoint == "api.perplexity.ai":
+                        custom_llm_provider = "perplexity"
                        dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY")
+                    elif endpoint == "api.endpoints.anyscale.com/v1":
+                        custom_llm_provider = "anyscale"
+                        dynamic_api_key = os.getenv("ANYSCALE_API_KEY")
+                    elif endpoint == "api.deepinfra.com/v1/openai":
+                        custom_llm_provider = "deepinfra"
+                        dynamic_api_key = os.getenv("DEEPINFRA_API_KEY")
                    return model, custom_llm_provider, dynamic_api_key, api_base

        # check if model in known model provider list  -> for huggingface models, raise exception as they don't have a fixed provider (can be togetherai, anyscale, baseten, runpod, et.)