mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
fix(utils.py): add param mapping for perplexity, anyscale, deepinfra
n n
This commit is contained in:
parent
e7bb4a0cbd
commit
10fe16c965
5 changed files with 141 additions and 83 deletions
|
@ -144,7 +144,9 @@ for key, value in model_cost.items():
|
|||
|
||||
# known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary
|
||||
openai_compatible_endpoints: List = [
|
||||
"api.perplexity.ai"
|
||||
"api.perplexity.ai",
|
||||
"api.endpoints.anyscale.com/v1",
|
||||
"api.deepinfra.com/v1/openai"
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -504,13 +504,16 @@ def completion(
|
|||
elif (
|
||||
model in litellm.open_ai_chat_completion_models
|
||||
or custom_llm_provider == "custom_openai"
|
||||
or custom_llm_provider == "deepinfra"
|
||||
or custom_llm_provider == "perplexity"
|
||||
or custom_llm_provider == "anyscale"
|
||||
or custom_llm_provider == "openai"
|
||||
or "ft:gpt-3.5-turbo" in model # finetune gpt-3.5-turbo
|
||||
): # allow user to make an openai call with a custom base
|
||||
# note: if a user sets a custom base - we should ensure this works
|
||||
# allow for the setting of dynamic and stateful api-bases
|
||||
api_base = (
|
||||
api_base
|
||||
api_base # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api base from there
|
||||
or litellm.api_base
|
||||
or get_secret("OPENAI_API_BASE")
|
||||
or "https://api.openai.com/v1"
|
||||
|
@ -522,7 +525,7 @@ def completion(
|
|||
)
|
||||
# set API KEY
|
||||
api_key = (
|
||||
api_key or
|
||||
api_key or # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
|
||||
dynamic_api_key or # allows us to read env variables for compatible openai api's like perplexity
|
||||
litellm.api_key or
|
||||
litellm.openai_key or
|
||||
|
|
|
@ -164,7 +164,9 @@ def test_completion_gpt4_vision():
|
|||
|
||||
def test_completion_perplexity_api():
|
||||
try:
|
||||
# litellm.set_verbose=True
|
||||
litellm.set_verbose=True
|
||||
litellm.num_retries = 0
|
||||
litellm.drop_params = True
|
||||
messages=[{
|
||||
"role": "system",
|
||||
"content": "You're a good bot"
|
||||
|
@ -178,12 +180,12 @@ def test_completion_perplexity_api():
|
|||
response = completion(
|
||||
model="mistral-7b-instruct",
|
||||
messages=messages,
|
||||
api_base="https://api.perplexity.ai")
|
||||
api_base="https://api.perplexity.ai", stop="Hello")
|
||||
print(response)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_perplexity_api()
|
||||
test_completion_perplexity_api()
|
||||
|
||||
def test_completion_perplexity_api_2():
|
||||
try:
|
||||
|
|
|
@ -56,87 +56,88 @@ load_dotenv()
|
|||
# test_openai_only()
|
||||
|
||||
|
||||
def test_multiple_deployments():
|
||||
import concurrent, time
|
||||
# litellm.set_verbose=True
|
||||
futures = {}
|
||||
model_list = [{ # list of model deployments
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE")
|
||||
},
|
||||
"tpm": 240000,
|
||||
"rpm": 1800
|
||||
}, {
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-functioncalling",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE")
|
||||
},
|
||||
"tpm": 240000,
|
||||
"rpm": 1800
|
||||
}, {
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "gpt-3.5-turbo",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
"tpm": 1000000,
|
||||
"rpm": 9000
|
||||
}]
|
||||
# def test_multiple_deployments():
|
||||
# import concurrent, time
|
||||
# # litellm.set_verbose=True
|
||||
# futures = {}
|
||||
# model_list = [{ # list of model deployments
|
||||
# "model_name": "gpt-3.5-turbo", # openai model name
|
||||
# "litellm_params": { # params for litellm completion/embedding call
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE")
|
||||
# },
|
||||
# "tpm": 240000,
|
||||
# "rpm": 1800
|
||||
# }, {
|
||||
# "model_name": "gpt-3.5-turbo", # openai model name
|
||||
# "litellm_params": { # params for litellm completion/embedding call
|
||||
# "model": "azure/chatgpt-functioncalling",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE")
|
||||
# },
|
||||
# "tpm": 240000,
|
||||
# "rpm": 1800
|
||||
# }, {
|
||||
# "model_name": "gpt-3.5-turbo", # openai model name
|
||||
# "litellm_params": { # params for litellm completion/embedding call
|
||||
# "model": "gpt-3.5-turbo",
|
||||
# "api_key": os.getenv("OPENAI_API_KEY"),
|
||||
# },
|
||||
# "tpm": 1000000,
|
||||
# "rpm": 9000
|
||||
# }]
|
||||
|
||||
router = Router(model_list=model_list, redis_host=os.getenv("REDIS_HOST"), redis_password=os.getenv("REDIS_PASSWORD"), redis_port=int(os.getenv("REDIS_PORT")), routing_strategy="latency-based-routing") # type: ignore
|
||||
# router = Router(model_list=model_list, redis_host=os.getenv("REDIS_HOST"), redis_password=os.getenv("REDIS_PASSWORD"), redis_port=int(os.getenv("REDIS_PORT")), routing_strategy="latency-based-routing") # type: ignore
|
||||
# # router = Router(model_list=model_list, redis_host=os.getenv("REDIS_HOST"), redis_password=os.getenv("REDIS_PASSWORD"), redis_port=int(os.getenv("REDIS_PORT"))) # type: ignore
|
||||
|
||||
results = []
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
kwargs = {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [{"role": "user", "content": """Context:
|
||||
# results = []
|
||||
# with ThreadPoolExecutor(max_workers=100) as executor:
|
||||
# kwargs = {
|
||||
# "model": "gpt-3.5-turbo",
|
||||
# "messages": [{"role": "user", "content": """Context:
|
||||
|
||||
In the historical era of Ancient Greece, a multitude of significant individuals lived, contributing immensely to various disciplines like science, politics, philosophy, and literature. For instance, Socrates, a renowned philosopher, primarily focused on ethics. His notable method, the Socratic Method, involved acknowledging one's own ignorance to stimulate critical thinking and illuminate ideas. His student, Plato, another prominent figure, founded the Academy in Athens. He proposed theories on justice, beauty, and equality, and also introduced the theory of forms, which is pivotal to understanding his philosophical insights. Another student of Socrates, Xenophon, distinguished himself more in the domain of history and military affairs.
|
||||
# In the historical era of Ancient Greece, a multitude of significant individuals lived, contributing immensely to various disciplines like science, politics, philosophy, and literature. For instance, Socrates, a renowned philosopher, primarily focused on ethics. His notable method, the Socratic Method, involved acknowledging one's own ignorance to stimulate critical thinking and illuminate ideas. His student, Plato, another prominent figure, founded the Academy in Athens. He proposed theories on justice, beauty, and equality, and also introduced the theory of forms, which is pivotal to understanding his philosophical insights. Another student of Socrates, Xenophon, distinguished himself more in the domain of history and military affairs.
|
||||
|
||||
Aristotle, who studied under Plato, led an equally remarkable life. His extensive works have been influential across various domains, including science, logic, metaphysics, ethics, and politics. Perhaps most notably, a substantial portion of the Western intellectual tradition traces back to his writings. He later tutored Alexander the Great who went on to create one of the most vast empires in the world.
|
||||
# Aristotle, who studied under Plato, led an equally remarkable life. His extensive works have been influential across various domains, including science, logic, metaphysics, ethics, and politics. Perhaps most notably, a substantial portion of the Western intellectual tradition traces back to his writings. He later tutored Alexander the Great who went on to create one of the most vast empires in the world.
|
||||
|
||||
In the domain of mathematics, Pythagoras and Euclid made significant contributions. Pythagoras is best known for the Pythagorean theorem, a fundamental principle in geometry, while Euclid, often regarded as the father of geometry, wrote "The Elements", a collection of definitions, axioms, theorems, and proofs.
|
||||
# In the domain of mathematics, Pythagoras and Euclid made significant contributions. Pythagoras is best known for the Pythagorean theorem, a fundamental principle in geometry, while Euclid, often regarded as the father of geometry, wrote "The Elements", a collection of definitions, axioms, theorems, and proofs.
|
||||
|
||||
Apart from these luminaries, the period also saw a number of influential political figures. Pericles, a prominent and influential Greek statesman, orator, and general of Athens during the Golden Age, specifically between the Persian and Peloponnesian wars, played a significant role in developing the Athenian democracy.
|
||||
# Apart from these luminaries, the period also saw a number of influential political figures. Pericles, a prominent and influential Greek statesman, orator, and general of Athens during the Golden Age, specifically between the Persian and Peloponnesian wars, played a significant role in developing the Athenian democracy.
|
||||
|
||||
The Ancient Greek era also witnessed extraordinary advancements in arts and literature. Homer, credited with the creation of the epic poems 'The Iliad' and 'The Odyssey,' is considered one of the greatest poets in history. The tragedies of Sophocles, Aeschylus, and Euripides left an indelible mark on the field of drama, and the comedies of Aristophanes remain influential even today.
|
||||
# The Ancient Greek era also witnessed extraordinary advancements in arts and literature. Homer, credited with the creation of the epic poems 'The Iliad' and 'The Odyssey,' is considered one of the greatest poets in history. The tragedies of Sophocles, Aeschylus, and Euripides left an indelible mark on the field of drama, and the comedies of Aristophanes remain influential even today.
|
||||
|
||||
---
|
||||
Question:
|
||||
# ---
|
||||
# Question:
|
||||
|
||||
Who among the mentioned figures from Ancient Greece contributed to the domain of mathematics and what are their significant contributions?"""}],
|
||||
}
|
||||
# Who among the mentioned figures from Ancient Greece contributed to the domain of mathematics and what are their significant contributions?"""}],
|
||||
# }
|
||||
|
||||
start_time = time.time()
|
||||
for _ in range(1000):
|
||||
future = executor.submit(router.completion, **kwargs)
|
||||
futures[future] = future
|
||||
# start_time = time.time()
|
||||
# for _ in range(1000):
|
||||
# future = executor.submit(router.completion, **kwargs)
|
||||
# futures[future] = future
|
||||
|
||||
# Retrieve the results from the futures
|
||||
while futures:
|
||||
done, not_done = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)
|
||||
for future in done:
|
||||
try:
|
||||
result = future.result()
|
||||
results.append(result)
|
||||
del futures[future]
|
||||
except Exception as e:
|
||||
print(f"Exception: {e}; traceback: {traceback.format_exc()}")
|
||||
del futures[future] # remove the done future
|
||||
# # Retrieve the results from the futures
|
||||
# while futures:
|
||||
# done, not_done = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)
|
||||
# for future in done:
|
||||
# try:
|
||||
# result = future.result()
|
||||
# results.append(result)
|
||||
# del futures[future]
|
||||
# except Exception as e:
|
||||
# print(f"Exception: {e}; traceback: {traceback.format_exc()}")
|
||||
# del futures[future] # remove the done future
|
||||
|
||||
end_time = time.time()
|
||||
print(f"ELAPSED TIME: {end_time-start_time}")
|
||||
# Check results
|
||||
# end_time = time.time()
|
||||
# print(f"ELAPSED TIME: {end_time-start_time}")
|
||||
# # Check results
|
||||
|
||||
|
||||
test_multiple_deployments()
|
||||
# test_multiple_deployments()
|
||||
### FUNCTION CALLING
|
||||
|
||||
def test_function_calling():
|
||||
|
|
|
@ -1854,7 +1854,7 @@ def get_optional_params( # use the openai defaults
|
|||
raise UnsupportedParamsError(status_code=500, message=f"Function calling is not supported by {custom_llm_provider}. To add it to the prompt, set `litellm.add_function_to_prompt = True`.")
|
||||
|
||||
def _check_valid_arg(supported_params):
|
||||
print_verbose(f"\nLiteLLM completion() model= {model}")
|
||||
print_verbose(f"\nLiteLLM completion() model= {model}; provider = {custom_llm_provider}")
|
||||
print_verbose(f"\nLiteLLM: Params passed to completion() {passed_params}")
|
||||
print_verbose(f"\nLiteLLM: Non-Default params passed to completion() {non_default_params}")
|
||||
unsupported_params = {}
|
||||
|
@ -1867,7 +1867,6 @@ def get_optional_params( # use the openai defaults
|
|||
unsupported_params[k] = non_default_params[k]
|
||||
if unsupported_params and not litellm.drop_params:
|
||||
raise UnsupportedParamsError(status_code=500, message=f"{custom_llm_provider} does not support parameters: {unsupported_params}. To drop these, set `litellm.drop_params=True`.")
|
||||
|
||||
## raise exception if provider doesn't support passed in param
|
||||
if custom_llm_provider == "anthropic":
|
||||
## check if unsupported param passed in
|
||||
|
@ -2158,7 +2157,7 @@ def get_optional_params( # use the openai defaults
|
|||
optional_params["presence_penalty"] = presence_penalty
|
||||
if stop is not None:
|
||||
optional_params["stop_sequences"] = stop
|
||||
elif model in litellm.aleph_alpha_models:
|
||||
elif custom_llm_provider == "aleph_alpha":
|
||||
supported_params = ["max_tokens", "stream", "top_p", "temperature", "presence_penalty", "frequency_penalty", "n", "stop"]
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
if max_tokens is not None:
|
||||
|
@ -2193,7 +2192,7 @@ def get_optional_params( # use the openai defaults
|
|||
optional_params["repeat_penalty"] = frequency_penalty
|
||||
if stop is not None:
|
||||
optional_params["stop_sequences"] = stop
|
||||
elif model in litellm.nlp_cloud_models or custom_llm_provider == "nlp_cloud":
|
||||
elif custom_llm_provider == "nlp_cloud":
|
||||
supported_params = ["max_tokens", "stream", "temperature", "top_p", "presence_penalty", "frequency_penalty", "n", "stop"]
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
|
||||
|
@ -2213,7 +2212,7 @@ def get_optional_params( # use the openai defaults
|
|||
optional_params["num_return_sequences"] = n
|
||||
if stop is not None:
|
||||
optional_params["stop_sequences"] = stop
|
||||
elif model in litellm.petals_models or custom_llm_provider == "petals":
|
||||
elif custom_llm_provider == "petals":
|
||||
supported_params = ["max_tokens", "temperature", "top_p", "stream"]
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
# max_new_tokens=1,temperature=0.9, top_p=0.6
|
||||
|
@ -2228,11 +2227,59 @@ def get_optional_params( # use the openai defaults
|
|||
elif custom_llm_provider == "deepinfra":
|
||||
supported_params = ["temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user"]
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
if temperature is not None:
|
||||
if temperature == 0 and model == "mistralai/Mistral-7B-Instruct-v0.1": # this model does no support temperature == 0
|
||||
temperature = 0.0001 # close to 0
|
||||
optional_params["temperature"] = temperature
|
||||
if top_p:
|
||||
optional_params["top_p"] = top_p
|
||||
if n:
|
||||
optional_params["n"] = n
|
||||
if stream:
|
||||
optional_params["stream"] = str
|
||||
if stop:
|
||||
optional_params["stop"] = stop
|
||||
if max_tokens:
|
||||
optional_params["max_tokens"] = max_tokens
|
||||
if presence_penalty:
|
||||
optional_params["presence_penalty"] = presence_penalty
|
||||
if frequency_penalty:
|
||||
optional_params["frequency_penalty"] = frequency_penalty
|
||||
if logit_bias:
|
||||
optional_params["logit_bias"] = logit_bias
|
||||
if user:
|
||||
optional_params["user"] = user
|
||||
elif custom_llm_provider == "perplexity":
|
||||
supported_params = ["temperature", "top_p", "stream", "max_tokens", "presence_penalty", "frequency_penalty"]
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
if temperature is not None:
|
||||
if temperature == 0 and model == "mistral-7b-instruct": # this model does no support temperature == 0
|
||||
temperature = 0.0001 # close to 0
|
||||
optional_params["temperature"] = temperature
|
||||
if top_p:
|
||||
optional_params["top_p"] = top_p
|
||||
if stream:
|
||||
optional_params["stream"] = stream
|
||||
if max_tokens:
|
||||
optional_params["max_tokens"] = max_tokens
|
||||
if presence_penalty:
|
||||
optional_params["presence_penalty"] = presence_penalty
|
||||
if frequency_penalty:
|
||||
optional_params["frequency_penalty"] = frequency_penalty
|
||||
elif custom_llm_provider == "anyscale":
|
||||
supported_params = ["temperature", "top_p", "stream", "max_tokens"]
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
optional_params = non_default_params
|
||||
if temperature is not None:
|
||||
if temperature == 0 and model == "mistralai/Mistral-7B-Instruct-v0.1": # this model does no support temperature == 0
|
||||
temperature = 0.0001 # close to 0
|
||||
optional_params["temperature"] = temperature
|
||||
if top_p:
|
||||
optional_params["top_p"] = top_p
|
||||
if stream:
|
||||
optional_params["stream"] = stream
|
||||
if max_tokens:
|
||||
optional_params["max_tokens"] = max_tokens
|
||||
else: # assume passing in params for openai/azure openai
|
||||
supported_params = ["functions", "function_call", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "response_format", "seed", "tools", "tool_choice", "max_retries"]
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
|
@ -2259,26 +2306,29 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_
|
|||
# perplexity is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.perplexity.ai
|
||||
api_base = "https://api.perplexity.ai"
|
||||
dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY")
|
||||
custom_llm_provider = "custom_openai"
|
||||
elif custom_llm_provider == "anyscale":
|
||||
# anyscale is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
|
||||
api_base = "https://api.endpoints.anyscale.com/v1"
|
||||
dynamic_api_key = os.getenv("ANYSCALE_API_KEY")
|
||||
custom_llm_provider = "custom_openai"
|
||||
elif custom_llm_provider == "deepinfra":
|
||||
# deepinfra is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
|
||||
api_base = "https://api.deepinfra.com/v1/openai"
|
||||
dynamic_api_key = os.getenv("DEEPINFRA_API_KEY")
|
||||
custom_llm_provider = "custom_openai"
|
||||
return model, custom_llm_provider, dynamic_api_key, api_base
|
||||
|
||||
# check if api base is a known openai compatible endpoint
|
||||
if api_base:
|
||||
for endpoint in litellm.openai_compatible_endpoints:
|
||||
if endpoint in api_base:
|
||||
custom_llm_provider = "custom_openai"
|
||||
if endpoint == "api.perplexity.ai":
|
||||
if endpoint == "api.perplexity.ai":
|
||||
custom_llm_provider = "perplexity"
|
||||
dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY")
|
||||
elif endpoint == "api.endpoints.anyscale.com/v1":
|
||||
custom_llm_provider = "anyscale"
|
||||
dynamic_api_key = os.getenv("ANYSCALE_API_KEY")
|
||||
elif endpoint == "api.deepinfra.com/v1/openai":
|
||||
custom_llm_provider = "deepinfra"
|
||||
dynamic_api_key = os.getenv("DEEPINFRA_API_KEY")
|
||||
return model, custom_llm_provider, dynamic_api_key, api_base
|
||||
|
||||
# check if model in known model provider list -> for huggingface models, raise exception as they don't have a fixed provider (can be togetherai, anyscale, baseten, runpod, et.)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue