mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
(chore) remove deprecated completion_with_config() tests
This commit is contained in:
parent
cc07ba1d6a
commit
248e5f3d92
4 changed files with 0 additions and 295 deletions
|
@ -1,49 +0,0 @@
|
||||||
# Model Config
|
|
||||||
|
|
||||||
Model-specific changes can make our code complicated, making it harder to debug errors. Use model configs to simplify this.
|
|
||||||
|
|
||||||
### usage
|
|
||||||
|
|
||||||
Handling prompt logic. Different models have different context windows. Use `adapt_to_prompt_size` to select the right model for the prompt (in case the current model is too small).
|
|
||||||
|
|
||||||
|
|
||||||
```python
|
|
||||||
from litellm import completion_with_config
|
|
||||||
import os
|
|
||||||
|
|
||||||
config = {
|
|
||||||
"available_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"],
|
|
||||||
"adapt_to_prompt_size": True, # 👈 key change
|
|
||||||
}
|
|
||||||
|
|
||||||
# set env var
|
|
||||||
os.environ["OPENAI_API_KEY"] = "your-api-key"
|
|
||||||
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
|
|
||||||
|
|
||||||
|
|
||||||
sample_text = "how does a court case get to the Supreme Court?" * 1000
|
|
||||||
messages = [{"content": sample_text, "role": "user"}]
|
|
||||||
response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
|
|
||||||
```
|
|
||||||
|
|
||||||
[**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783)
|
|
||||||
|
|
||||||
### Complete Config Structure
|
|
||||||
|
|
||||||
```python
|
|
||||||
config = {
|
|
||||||
"default_fallback_models": # [Optional] List of model names to try if a call fails
|
|
||||||
"available_models": # [Optional] List of all possible models you could call
|
|
||||||
"adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models)
|
|
||||||
"model": {
|
|
||||||
"model-name": {
|
|
||||||
"needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged.
|
|
||||||
"error_handling": {
|
|
||||||
"error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list
|
|
||||||
"fallback_model": "" # str, name of the model it should try instead, when that error occurs
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
|
@ -500,7 +500,6 @@ from .utils import (
|
||||||
validate_environment,
|
validate_environment,
|
||||||
check_valid_key,
|
check_valid_key,
|
||||||
get_llm_provider,
|
get_llm_provider,
|
||||||
completion_with_config,
|
|
||||||
register_model,
|
register_model,
|
||||||
encode,
|
encode,
|
||||||
decode,
|
decode,
|
||||||
|
|
|
@ -1,118 +0,0 @@
|
||||||
import sys, os
|
|
||||||
import traceback
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
import os
|
|
||||||
|
|
||||||
sys.path.insert(
|
|
||||||
0, os.path.abspath("../..")
|
|
||||||
) # Adds the parent directory to the system path
|
|
||||||
import pytest
|
|
||||||
import litellm
|
|
||||||
from litellm import completion_with_config
|
|
||||||
|
|
||||||
config = {
|
|
||||||
"default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
|
|
||||||
"model": {
|
|
||||||
"claude-instant-1": {"needs_moderation": True},
|
|
||||||
"gpt-3.5-turbo": {
|
|
||||||
"error_handling": {
|
|
||||||
"ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def test_config_context_window_exceeded():
|
|
||||||
try:
|
|
||||||
sample_text = "how does a court case get to the Supreme Court?" * 1000
|
|
||||||
messages = [{"content": sample_text, "role": "user"}]
|
|
||||||
response = completion_with_config(
|
|
||||||
model="gpt-3.5-turbo", messages=messages, config=config
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Exception: {e}")
|
|
||||||
pytest.fail(f"An exception occurred: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
# test_config_context_window_exceeded()
|
|
||||||
|
|
||||||
|
|
||||||
def test_config_context_moderation():
|
|
||||||
try:
|
|
||||||
messages = [{"role": "user", "content": "I want to kill them."}]
|
|
||||||
response = completion_with_config(
|
|
||||||
model="claude-instant-1", messages=messages, config=config
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Exception: {e}")
|
|
||||||
pytest.fail(f"An exception occurred: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
# test_config_context_moderation()
|
|
||||||
|
|
||||||
|
|
||||||
def test_config_context_default_fallback():
|
|
||||||
try:
|
|
||||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
|
||||||
response = completion_with_config(
|
|
||||||
model="claude-instant-1",
|
|
||||||
messages=messages,
|
|
||||||
config=config,
|
|
||||||
api_key="bad-key",
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Exception: {e}")
|
|
||||||
pytest.fail(f"An exception occurred: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
# test_config_context_default_fallback()
|
|
||||||
|
|
||||||
|
|
||||||
config = {
|
|
||||||
"default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
|
|
||||||
"available_models": [
|
|
||||||
"gpt-3.5-turbo",
|
|
||||||
"gpt-3.5-turbo-0301",
|
|
||||||
"gpt-3.5-turbo-0613",
|
|
||||||
"gpt-4",
|
|
||||||
"gpt-4-0314",
|
|
||||||
"gpt-4-0613",
|
|
||||||
"j2-ultra",
|
|
||||||
"command-nightly",
|
|
||||||
"togethercomputer/llama-2-70b-chat",
|
|
||||||
"chat-bison",
|
|
||||||
"chat-bison@001",
|
|
||||||
"claude-2",
|
|
||||||
],
|
|
||||||
"adapt_to_prompt_size": True, # type: ignore
|
|
||||||
"model": {
|
|
||||||
"claude-instant-1": {"needs_moderation": True},
|
|
||||||
"gpt-3.5-turbo": {
|
|
||||||
"error_handling": {
|
|
||||||
"ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def test_config_context_adapt_to_prompt():
|
|
||||||
try:
|
|
||||||
sample_text = "how does a court case get to the Supreme Court?" * 1000
|
|
||||||
messages = [{"content": sample_text, "role": "user"}]
|
|
||||||
response = completion_with_config(
|
|
||||||
model="gpt-3.5-turbo", messages=messages, config=config
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Exception: {e}")
|
|
||||||
pytest.fail(f"An exception occurred: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
test_config_context_adapt_to_prompt()
|
|
127
litellm/utils.py
127
litellm/utils.py
|
@ -7843,133 +7843,6 @@ def read_config_args(config_path) -> dict:
|
||||||
########## experimental completion variants ############################
|
########## experimental completion variants ############################
|
||||||
|
|
||||||
|
|
||||||
def completion_with_config(config: Union[dict, str], **kwargs):
|
|
||||||
"""
|
|
||||||
Generate a litellm.completion() using a config dict and all supported completion args
|
|
||||||
|
|
||||||
Example config;
|
|
||||||
config = {
|
|
||||||
"default_fallback_models": # [Optional] List of model names to try if a call fails
|
|
||||||
"available_models": # [Optional] List of all possible models you could call
|
|
||||||
"adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models)
|
|
||||||
"model": {
|
|
||||||
"model-name": {
|
|
||||||
"needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged.
|
|
||||||
"error_handling": {
|
|
||||||
"error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list
|
|
||||||
"fallback_model": "" # str, name of the model it should try instead, when that error occurs
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
config (Union[dict, str]): A configuration for litellm
|
|
||||||
**kwargs: Additional keyword arguments for litellm.completion
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
litellm.ModelResponse: A ModelResponse with the generated completion
|
|
||||||
|
|
||||||
"""
|
|
||||||
if config is not None:
|
|
||||||
if isinstance(config, str):
|
|
||||||
config = read_config_args(config)
|
|
||||||
elif isinstance(config, dict):
|
|
||||||
config = config
|
|
||||||
else:
|
|
||||||
raise Exception("Config path must be a string or a dictionary.")
|
|
||||||
else:
|
|
||||||
raise Exception("Config path not passed in.")
|
|
||||||
|
|
||||||
if config is None:
|
|
||||||
raise Exception("No completion config in the config file")
|
|
||||||
|
|
||||||
models_with_config = config["model"].keys()
|
|
||||||
model = kwargs["model"]
|
|
||||||
messages = kwargs["messages"]
|
|
||||||
|
|
||||||
## completion config
|
|
||||||
fallback_models = config.get("default_fallback_models", None)
|
|
||||||
available_models = config.get("available_models", None)
|
|
||||||
adapt_to_prompt_size = config.get("adapt_to_prompt_size", False)
|
|
||||||
trim_messages_flag = config.get("trim_messages", False)
|
|
||||||
prompt_larger_than_model = False
|
|
||||||
max_model = model
|
|
||||||
try:
|
|
||||||
max_tokens = litellm.get_max_tokens(model)["max_tokens"]
|
|
||||||
except:
|
|
||||||
max_tokens = 2048 # assume curr model's max window is 2048 tokens
|
|
||||||
if adapt_to_prompt_size:
|
|
||||||
## Pick model based on token window
|
|
||||||
prompt_tokens = litellm.token_counter(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
text="".join(message["content"] for message in messages),
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
curr_max_tokens = litellm.get_max_tokens(model)["max_tokens"]
|
|
||||||
except:
|
|
||||||
curr_max_tokens = 2048
|
|
||||||
if curr_max_tokens < prompt_tokens:
|
|
||||||
prompt_larger_than_model = True
|
|
||||||
for available_model in available_models:
|
|
||||||
try:
|
|
||||||
curr_max_tokens = litellm.get_max_tokens(available_model)[
|
|
||||||
"max_tokens"
|
|
||||||
]
|
|
||||||
if curr_max_tokens > max_tokens:
|
|
||||||
max_tokens = curr_max_tokens
|
|
||||||
max_model = available_model
|
|
||||||
if curr_max_tokens > prompt_tokens:
|
|
||||||
model = available_model
|
|
||||||
prompt_larger_than_model = False
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
if prompt_larger_than_model:
|
|
||||||
messages = trim_messages(messages=messages, model=max_model)
|
|
||||||
kwargs["messages"] = messages
|
|
||||||
|
|
||||||
kwargs["model"] = model
|
|
||||||
try:
|
|
||||||
if model in models_with_config:
|
|
||||||
## Moderation check
|
|
||||||
if config["model"][model].get("needs_moderation"):
|
|
||||||
input = " ".join(message["content"] for message in messages)
|
|
||||||
response = litellm.moderation(input=input)
|
|
||||||
flagged = response["results"][0]["flagged"]
|
|
||||||
if flagged:
|
|
||||||
raise Exception("This response was flagged as inappropriate")
|
|
||||||
|
|
||||||
## Model-specific Error Handling
|
|
||||||
error_handling = None
|
|
||||||
if config["model"][model].get("error_handling"):
|
|
||||||
error_handling = config["model"][model]["error_handling"]
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = litellm.completion(**kwargs)
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
exception_name = type(e).__name__
|
|
||||||
fallback_model = None
|
|
||||||
if error_handling and exception_name in error_handling:
|
|
||||||
error_handler = error_handling[exception_name]
|
|
||||||
# either switch model or api key
|
|
||||||
fallback_model = error_handler.get("fallback_model", None)
|
|
||||||
if fallback_model:
|
|
||||||
kwargs["model"] = fallback_model
|
|
||||||
return litellm.completion(**kwargs)
|
|
||||||
raise e
|
|
||||||
else:
|
|
||||||
return litellm.completion(**kwargs)
|
|
||||||
except Exception as e:
|
|
||||||
if fallback_models:
|
|
||||||
model = fallback_models.pop(0)
|
|
||||||
return completion_with_fallbacks(
|
|
||||||
model=model, messages=messages, fallbacks=fallback_models
|
|
||||||
)
|
|
||||||
raise e
|
|
||||||
|
|
||||||
|
|
||||||
def completion_with_fallbacks(**kwargs):
|
def completion_with_fallbacks(**kwargs):
|
||||||
nested_kwargs = kwargs.pop("kwargs", {})
|
nested_kwargs = kwargs.pop("kwargs", {})
|
||||||
response = None
|
response = None
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue