fix(utils.py): add param mapping for perplexity, anyscale, deepinfra

n

n
This commit is contained in:
Krrish Dholakia 2023-11-22 10:04:27 -08:00
parent e7bb4a0cbd
commit 10fe16c965
5 changed files with 141 additions and 83 deletions

View file

@ -144,7 +144,9 @@ for key, value in model_cost.items():
# known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary
openai_compatible_endpoints: List = [
"api.perplexity.ai"
"api.perplexity.ai",
"api.endpoints.anyscale.com/v1",
"api.deepinfra.com/v1/openai"
]

View file

@ -504,13 +504,16 @@ def completion(
elif (
model in litellm.open_ai_chat_completion_models
or custom_llm_provider == "custom_openai"
or custom_llm_provider == "deepinfra"
or custom_llm_provider == "perplexity"
or custom_llm_provider == "anyscale"
or custom_llm_provider == "openai"
or "ft:gpt-3.5-turbo" in model # finetune gpt-3.5-turbo
): # allow user to make an openai call with a custom base
# note: if a user sets a custom base - we should ensure this works
# allow for the setting of dynamic and stateful api-bases
api_base = (
api_base
api_base # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api base from there
or litellm.api_base
or get_secret("OPENAI_API_BASE")
or "https://api.openai.com/v1"
@ -522,7 +525,7 @@ def completion(
)
# set API KEY
api_key = (
api_key or
api_key or # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
dynamic_api_key or # allows us to read env variables for compatible openai api's like perplexity
litellm.api_key or
litellm.openai_key or

View file

@ -164,7 +164,9 @@ def test_completion_gpt4_vision():
def test_completion_perplexity_api():
try:
# litellm.set_verbose=True
litellm.set_verbose=True
litellm.num_retries = 0
litellm.drop_params = True
messages=[{
"role": "system",
"content": "You're a good bot"
@ -178,12 +180,12 @@ def test_completion_perplexity_api():
response = completion(
model="mistral-7b-instruct",
messages=messages,
api_base="https://api.perplexity.ai")
api_base="https://api.perplexity.ai", stop="Hello")
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_perplexity_api()
test_completion_perplexity_api()
def test_completion_perplexity_api_2():
try:

View file

@ -56,87 +56,88 @@ load_dotenv()
# test_openai_only()
def test_multiple_deployments():
import concurrent, time
# litellm.set_verbose=True
futures = {}
model_list = [{ # list of model deployments
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE")
},
"tpm": 240000,
"rpm": 1800
}, {
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-functioncalling",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE")
},
"tpm": 240000,
"rpm": 1800
}, {
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
},
"tpm": 1000000,
"rpm": 9000
}]
# def test_multiple_deployments():
# import concurrent, time
# # litellm.set_verbose=True
# futures = {}
# model_list = [{ # list of model deployments
# "model_name": "gpt-3.5-turbo", # openai model name
# "litellm_params": { # params for litellm completion/embedding call
# "model": "azure/chatgpt-v-2",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE")
# },
# "tpm": 240000,
# "rpm": 1800
# }, {
# "model_name": "gpt-3.5-turbo", # openai model name
# "litellm_params": { # params for litellm completion/embedding call
# "model": "azure/chatgpt-functioncalling",
# "api_key": os.getenv("AZURE_API_KEY"),
# "api_version": os.getenv("AZURE_API_VERSION"),
# "api_base": os.getenv("AZURE_API_BASE")
# },
# "tpm": 240000,
# "rpm": 1800
# }, {
# "model_name": "gpt-3.5-turbo", # openai model name
# "litellm_params": { # params for litellm completion/embedding call
# "model": "gpt-3.5-turbo",
# "api_key": os.getenv("OPENAI_API_KEY"),
# },
# "tpm": 1000000,
# "rpm": 9000
# }]
router = Router(model_list=model_list, redis_host=os.getenv("REDIS_HOST"), redis_password=os.getenv("REDIS_PASSWORD"), redis_port=int(os.getenv("REDIS_PORT")), routing_strategy="latency-based-routing") # type: ignore
# router = Router(model_list=model_list, redis_host=os.getenv("REDIS_HOST"), redis_password=os.getenv("REDIS_PASSWORD"), redis_port=int(os.getenv("REDIS_PORT")), routing_strategy="latency-based-routing") # type: ignore
# # router = Router(model_list=model_list, redis_host=os.getenv("REDIS_HOST"), redis_password=os.getenv("REDIS_PASSWORD"), redis_port=int(os.getenv("REDIS_PORT"))) # type: ignore
results = []
with ThreadPoolExecutor(max_workers=10) as executor:
kwargs = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": """Context:
# results = []
# with ThreadPoolExecutor(max_workers=100) as executor:
# kwargs = {
# "model": "gpt-3.5-turbo",
# "messages": [{"role": "user", "content": """Context:
In the historical era of Ancient Greece, a multitude of significant individuals lived, contributing immensely to various disciplines like science, politics, philosophy, and literature. For instance, Socrates, a renowned philosopher, primarily focused on ethics. His notable method, the Socratic Method, involved acknowledging one's own ignorance to stimulate critical thinking and illuminate ideas. His student, Plato, another prominent figure, founded the Academy in Athens. He proposed theories on justice, beauty, and equality, and also introduced the theory of forms, which is pivotal to understanding his philosophical insights. Another student of Socrates, Xenophon, distinguished himself more in the domain of history and military affairs.
# In the historical era of Ancient Greece, a multitude of significant individuals lived, contributing immensely to various disciplines like science, politics, philosophy, and literature. For instance, Socrates, a renowned philosopher, primarily focused on ethics. His notable method, the Socratic Method, involved acknowledging one's own ignorance to stimulate critical thinking and illuminate ideas. His student, Plato, another prominent figure, founded the Academy in Athens. He proposed theories on justice, beauty, and equality, and also introduced the theory of forms, which is pivotal to understanding his philosophical insights. Another student of Socrates, Xenophon, distinguished himself more in the domain of history and military affairs.
Aristotle, who studied under Plato, led an equally remarkable life. His extensive works have been influential across various domains, including science, logic, metaphysics, ethics, and politics. Perhaps most notably, a substantial portion of the Western intellectual tradition traces back to his writings. He later tutored Alexander the Great who went on to create one of the most vast empires in the world.
# Aristotle, who studied under Plato, led an equally remarkable life. His extensive works have been influential across various domains, including science, logic, metaphysics, ethics, and politics. Perhaps most notably, a substantial portion of the Western intellectual tradition traces back to his writings. He later tutored Alexander the Great who went on to create one of the most vast empires in the world.
In the domain of mathematics, Pythagoras and Euclid made significant contributions. Pythagoras is best known for the Pythagorean theorem, a fundamental principle in geometry, while Euclid, often regarded as the father of geometry, wrote "The Elements", a collection of definitions, axioms, theorems, and proofs.
# In the domain of mathematics, Pythagoras and Euclid made significant contributions. Pythagoras is best known for the Pythagorean theorem, a fundamental principle in geometry, while Euclid, often regarded as the father of geometry, wrote "The Elements", a collection of definitions, axioms, theorems, and proofs.
Apart from these luminaries, the period also saw a number of influential political figures. Pericles, a prominent and influential Greek statesman, orator, and general of Athens during the Golden Age, specifically between the Persian and Peloponnesian wars, played a significant role in developing the Athenian democracy.
# Apart from these luminaries, the period also saw a number of influential political figures. Pericles, a prominent and influential Greek statesman, orator, and general of Athens during the Golden Age, specifically between the Persian and Peloponnesian wars, played a significant role in developing the Athenian democracy.
The Ancient Greek era also witnessed extraordinary advancements in arts and literature. Homer, credited with the creation of the epic poems 'The Iliad' and 'The Odyssey,' is considered one of the greatest poets in history. The tragedies of Sophocles, Aeschylus, and Euripides left an indelible mark on the field of drama, and the comedies of Aristophanes remain influential even today.
# The Ancient Greek era also witnessed extraordinary advancements in arts and literature. Homer, credited with the creation of the epic poems 'The Iliad' and 'The Odyssey,' is considered one of the greatest poets in history. The tragedies of Sophocles, Aeschylus, and Euripides left an indelible mark on the field of drama, and the comedies of Aristophanes remain influential even today.
---
Question:
# ---
# Question:
Who among the mentioned figures from Ancient Greece contributed to the domain of mathematics and what are their significant contributions?"""}],
}
# Who among the mentioned figures from Ancient Greece contributed to the domain of mathematics and what are their significant contributions?"""}],
# }
start_time = time.time()
for _ in range(1000):
future = executor.submit(router.completion, **kwargs)
futures[future] = future
# start_time = time.time()
# for _ in range(1000):
# future = executor.submit(router.completion, **kwargs)
# futures[future] = future
# Retrieve the results from the futures
while futures:
done, not_done = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)
for future in done:
try:
result = future.result()
results.append(result)
del futures[future]
except Exception as e:
print(f"Exception: {e}; traceback: {traceback.format_exc()}")
del futures[future] # remove the done future
# # Retrieve the results from the futures
# while futures:
# done, not_done = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)
# for future in done:
# try:
# result = future.result()
# results.append(result)
# del futures[future]
# except Exception as e:
# print(f"Exception: {e}; traceback: {traceback.format_exc()}")
# del futures[future] # remove the done future
end_time = time.time()
print(f"ELAPSED TIME: {end_time-start_time}")
# Check results
# end_time = time.time()
# print(f"ELAPSED TIME: {end_time-start_time}")
# # Check results
test_multiple_deployments()
# test_multiple_deployments()
### FUNCTION CALLING
def test_function_calling():

View file

@ -1854,7 +1854,7 @@ def get_optional_params( # use the openai defaults
raise UnsupportedParamsError(status_code=500, message=f"Function calling is not supported by {custom_llm_provider}. To add it to the prompt, set `litellm.add_function_to_prompt = True`.")
def _check_valid_arg(supported_params):
print_verbose(f"\nLiteLLM completion() model= {model}")
print_verbose(f"\nLiteLLM completion() model= {model}; provider = {custom_llm_provider}")
print_verbose(f"\nLiteLLM: Params passed to completion() {passed_params}")
print_verbose(f"\nLiteLLM: Non-Default params passed to completion() {non_default_params}")
unsupported_params = {}
@ -1867,7 +1867,6 @@ def get_optional_params( # use the openai defaults
unsupported_params[k] = non_default_params[k]
if unsupported_params and not litellm.drop_params:
raise UnsupportedParamsError(status_code=500, message=f"{custom_llm_provider} does not support parameters: {unsupported_params}. To drop these, set `litellm.drop_params=True`.")
## raise exception if provider doesn't support passed in param
if custom_llm_provider == "anthropic":
## check if unsupported param passed in
@ -2158,7 +2157,7 @@ def get_optional_params( # use the openai defaults
optional_params["presence_penalty"] = presence_penalty
if stop is not None:
optional_params["stop_sequences"] = stop
elif model in litellm.aleph_alpha_models:
elif custom_llm_provider == "aleph_alpha":
supported_params = ["max_tokens", "stream", "top_p", "temperature", "presence_penalty", "frequency_penalty", "n", "stop"]
_check_valid_arg(supported_params=supported_params)
if max_tokens is not None:
@ -2193,7 +2192,7 @@ def get_optional_params( # use the openai defaults
optional_params["repeat_penalty"] = frequency_penalty
if stop is not None:
optional_params["stop_sequences"] = stop
elif model in litellm.nlp_cloud_models or custom_llm_provider == "nlp_cloud":
elif custom_llm_provider == "nlp_cloud":
supported_params = ["max_tokens", "stream", "temperature", "top_p", "presence_penalty", "frequency_penalty", "n", "stop"]
_check_valid_arg(supported_params=supported_params)
@ -2213,7 +2212,7 @@ def get_optional_params( # use the openai defaults
optional_params["num_return_sequences"] = n
if stop is not None:
optional_params["stop_sequences"] = stop
elif model in litellm.petals_models or custom_llm_provider == "petals":
elif custom_llm_provider == "petals":
supported_params = ["max_tokens", "temperature", "top_p", "stream"]
_check_valid_arg(supported_params=supported_params)
# max_new_tokens=1,temperature=0.9, top_p=0.6
@ -2228,11 +2227,59 @@ def get_optional_params( # use the openai defaults
elif custom_llm_provider == "deepinfra":
supported_params = ["temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user"]
_check_valid_arg(supported_params=supported_params)
if temperature is not None:
if temperature == 0 and model == "mistralai/Mistral-7B-Instruct-v0.1": # this model does no support temperature == 0
temperature = 0.0001 # close to 0
optional_params["temperature"] = temperature
if top_p:
optional_params["top_p"] = top_p
if n:
optional_params["n"] = n
if stream:
optional_params["stream"] = str
if stop:
optional_params["stop"] = stop
if max_tokens:
optional_params["max_tokens"] = max_tokens
if presence_penalty:
optional_params["presence_penalty"] = presence_penalty
if frequency_penalty:
optional_params["frequency_penalty"] = frequency_penalty
if logit_bias:
optional_params["logit_bias"] = logit_bias
if user:
optional_params["user"] = user
elif custom_llm_provider == "perplexity":
supported_params = ["temperature", "top_p", "stream", "max_tokens", "presence_penalty", "frequency_penalty"]
_check_valid_arg(supported_params=supported_params)
if temperature is not None:
if temperature == 0 and model == "mistral-7b-instruct": # this model does no support temperature == 0
temperature = 0.0001 # close to 0
optional_params["temperature"] = temperature
if top_p:
optional_params["top_p"] = top_p
if stream:
optional_params["stream"] = stream
if max_tokens:
optional_params["max_tokens"] = max_tokens
if presence_penalty:
optional_params["presence_penalty"] = presence_penalty
if frequency_penalty:
optional_params["frequency_penalty"] = frequency_penalty
elif custom_llm_provider == "anyscale":
supported_params = ["temperature", "top_p", "stream", "max_tokens"]
_check_valid_arg(supported_params=supported_params)
optional_params = non_default_params
if temperature is not None:
if temperature == 0 and model == "mistralai/Mistral-7B-Instruct-v0.1": # this model does no support temperature == 0
temperature = 0.0001 # close to 0
optional_params["temperature"] = temperature
if top_p:
optional_params["top_p"] = top_p
if stream:
optional_params["stream"] = stream
if max_tokens:
optional_params["max_tokens"] = max_tokens
else: # assume passing in params for openai/azure openai
supported_params = ["functions", "function_call", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "response_format", "seed", "tools", "tool_choice", "max_retries"]
_check_valid_arg(supported_params=supported_params)
@ -2259,26 +2306,29 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_
# perplexity is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.perplexity.ai
api_base = "https://api.perplexity.ai"
dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY")
custom_llm_provider = "custom_openai"
elif custom_llm_provider == "anyscale":
# anyscale is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
api_base = "https://api.endpoints.anyscale.com/v1"
dynamic_api_key = os.getenv("ANYSCALE_API_KEY")
custom_llm_provider = "custom_openai"
elif custom_llm_provider == "deepinfra":
# deepinfra is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
api_base = "https://api.deepinfra.com/v1/openai"
dynamic_api_key = os.getenv("DEEPINFRA_API_KEY")
custom_llm_provider = "custom_openai"
return model, custom_llm_provider, dynamic_api_key, api_base
# check if api base is a known openai compatible endpoint
if api_base:
for endpoint in litellm.openai_compatible_endpoints:
if endpoint in api_base:
custom_llm_provider = "custom_openai"
if endpoint == "api.perplexity.ai":
if endpoint == "api.perplexity.ai":
custom_llm_provider = "perplexity"
dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY")
elif endpoint == "api.endpoints.anyscale.com/v1":
custom_llm_provider = "anyscale"
dynamic_api_key = os.getenv("ANYSCALE_API_KEY")
elif endpoint == "api.deepinfra.com/v1/openai":
custom_llm_provider = "deepinfra"
dynamic_api_key = os.getenv("DEEPINFRA_API_KEY")
return model, custom_llm_provider, dynamic_api_key, api_base
# check if model in known model provider list -> for huggingface models, raise exception as they don't have a fixed provider (can be togetherai, anyscale, baseten, runpod, et.)