Merge branch 'main' into explicit-args-acomplete

This commit is contained in:
Mateo Cámara 2024-01-09 13:07:37 +01:00 committed by GitHub
commit 203089e6c7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 271 additions and 477 deletions

View file

@ -1,49 +0,0 @@
# Model Config
Model-specific changes can make our code complicated, making it harder to debug errors. Use model configs to simplify this.
### usage
Handling prompt logic. Different models have different context windows. Use `adapt_to_prompt_size` to select the right model for the prompt (in case the current model is too small).
```python
from litellm import completion_with_config
import os
config = {
"available_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"],
"adapt_to_prompt_size": True, # 👈 key change
}
# set env var
os.environ["OPENAI_API_KEY"] = "your-api-key"
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
sample_text = "how does a court case get to the Supreme Court?" * 1000
messages = [{"content": sample_text, "role": "user"}]
response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
```
[**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783)
### Complete Config Structure
```python
config = {
"default_fallback_models": # [Optional] List of model names to try if a call fails
"available_models": # [Optional] List of all possible models you could call
"adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models)
"model": {
"model-name": {
"needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged.
"error_handling": {
"error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list
"fallback_model": "" # str, name of the model it should try instead, when that error occurs
}
}
}
}
}
```

View file

@ -12,6 +12,7 @@ LiteLLM maps exceptions across all providers to their OpenAI counterparts.
| 429 | RateLimitError |
| >=500 | InternalServerError |
| N/A | ContextWindowExceededError|
| 400 | ContentPolicyViolationError|
| N/A | APIConnectionError |

View file

@ -129,26 +129,6 @@ const sidebars = {
"secret",
"completion/token_usage",
"load_test",
{
type: 'category',
label: 'Tutorials',
items: [
'tutorials/azure_openai',
"tutorials/lm_evaluation_harness",
"tutorials/eval_suites",
'tutorials/oobabooga',
"tutorials/gradio_integration",
'tutorials/huggingface_codellama',
'tutorials/huggingface_tutorial',
'tutorials/TogetherAI_liteLLM',
'tutorials/finetuned_chat_gpt',
'tutorials/sagemaker_llms',
'tutorials/text_completion',
"tutorials/first_playground",
'tutorials/compare_llms',
"tutorials/model_fallbacks",
],
},
{
type: "category",
label: "Logging & Observability",
@ -170,6 +150,23 @@ const sidebars = {
],
},
"caching/redis_cache",
{
type: 'category',
label: 'Tutorials',
items: [
'tutorials/azure_openai',
'tutorials/oobabooga',
"tutorials/gradio_integration",
'tutorials/huggingface_codellama',
'tutorials/huggingface_tutorial',
'tutorials/TogetherAI_liteLLM',
'tutorials/finetuned_chat_gpt',
'tutorials/sagemaker_llms',
'tutorials/text_completion',
"tutorials/first_playground",
"tutorials/model_fallbacks",
],
},
{
type: "category",
label: "LangChain, LlamaIndex Integration",

View file

@ -500,7 +500,6 @@ from .utils import (
validate_environment,
check_valid_key,
get_llm_provider,
completion_with_config,
register_model,
encode,
decode,
@ -544,6 +543,7 @@ from .exceptions import (
ServiceUnavailableError,
OpenAIError,
ContextWindowExceededError,
ContentPolicyViolationError,
BudgetExceededError,
APIError,
Timeout,

View file

@ -108,6 +108,21 @@ class ContextWindowExceededError(BadRequestError): # type: ignore
) # Call the base class constructor with the parameters it needs
class ContentPolicyViolationError(BadRequestError): # type: ignore
# Error code: 400 - {'error': {'code': 'content_policy_violation', 'message': 'Your request was rejected as a result of our safety system. Image descriptions generated from your prompt may contain text that is not allowed by our safety system. If you believe this was done in error, your request may succeed if retried, or by adjusting your prompt.', 'param': None, 'type': 'invalid_request_error'}}
def __init__(self, message, model, llm_provider, response: httpx.Response):
self.status_code = 400
self.message = message
self.model = model
self.llm_provider = llm_provider
super().__init__(
message=self.message,
model=self.model, # type: ignore
llm_provider=self.llm_provider, # type: ignore
response=response,
) # Call the base class constructor with the parameters it needs
class ServiceUnavailableError(APIStatusError): # type: ignore
def __init__(self, message, llm_provider, model, response: httpx.Response):
self.status_code = 503

View file

@ -1173,7 +1173,7 @@ def completion(
acompletion=acompletion,
logging_obj=logging,
custom_prompt_dict=custom_prompt_dict,
timeout=timeout
timeout=timeout,
)
if (
"stream" in optional_params
@ -2894,6 +2894,7 @@ def image_generation(
Currently supports just Azure + OpenAI.
"""
try:
aimg_generation = kwargs.get("aimg_generation", False)
litellm_call_id = kwargs.get("litellm_call_id", None)
logger_fn = kwargs.get("logger_fn", None)
@ -3046,6 +3047,14 @@ def image_generation(
)
return model_response
except Exception as e:
## Map to OpenAI Exception
raise exception_type(
model=model,
custom_llm_provider=custom_llm_provider,
original_exception=e,
completion_kwargs=locals(),
)
##### Health Endpoints #######################
@ -3170,7 +3179,8 @@ def config_completion(**kwargs):
"No config path set, please set a config path using `litellm.config_path = 'path/to/config.json'`"
)
def stream_chunk_builder_text_completion(chunks: list, messages: Optional[List]=None):
def stream_chunk_builder_text_completion(chunks: list, messages: Optional[List] = None):
id = chunks[0]["id"]
object = chunks[0]["object"]
created = chunks[0]["created"]
@ -3190,20 +3200,24 @@ def stream_chunk_builder_text_completion(chunks: list, messages: Optional[List]=
"text": None,
"index": 0,
"logprobs": logprobs,
"finish_reason": finish_reason
"finish_reason": finish_reason,
}
],
"usage": {
"prompt_tokens": None,
"completion_tokens": None,
"total_tokens": None
}
"total_tokens": None,
},
}
content_list = []
for chunk in chunks:
choices = chunk["choices"]
for choice in choices:
if choice is not None and hasattr(choice, "text") and choice.get("text") is not None:
if (
choice is not None
and hasattr(choice, "text")
and choice.get("text") is not None
):
_choice = choice.get("text")
content_list.append(_choice)
@ -3235,13 +3249,18 @@ def stream_chunk_builder_text_completion(chunks: list, messages: Optional[List]=
)
return response
def stream_chunk_builder(chunks: list, messages: Optional[list] = None):
id = chunks[0]["id"]
object = chunks[0]["object"]
created = chunks[0]["created"]
model = chunks[0]["model"]
system_fingerprint = chunks[0].get("system_fingerprint", None)
if isinstance(chunks[0]["choices"][0], litellm.utils.TextChoices): # route to the text completion logic
if isinstance(
chunks[0]["choices"][0], litellm.utils.TextChoices
): # route to the text completion logic
return stream_chunk_builder_text_completion(chunks=chunks, messages=messages)
role = chunks[0]["choices"][0]["delta"]["role"]
finish_reason = chunks[-1]["choices"][0]["finish_reason"]

View file

@ -1,118 +0,0 @@
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
import litellm
from litellm import completion_with_config
config = {
"default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
"model": {
"claude-instant-1": {"needs_moderation": True},
"gpt-3.5-turbo": {
"error_handling": {
"ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"}
}
},
},
}
def test_config_context_window_exceeded():
try:
sample_text = "how does a court case get to the Supreme Court?" * 1000
messages = [{"content": sample_text, "role": "user"}]
response = completion_with_config(
model="gpt-3.5-turbo", messages=messages, config=config
)
print(response)
except Exception as e:
print(f"Exception: {e}")
pytest.fail(f"An exception occurred: {e}")
# test_config_context_window_exceeded()
def test_config_context_moderation():
try:
messages = [{"role": "user", "content": "I want to kill them."}]
response = completion_with_config(
model="claude-instant-1", messages=messages, config=config
)
print(response)
except Exception as e:
print(f"Exception: {e}")
pytest.fail(f"An exception occurred: {e}")
# test_config_context_moderation()
def test_config_context_default_fallback():
try:
messages = [{"role": "user", "content": "Hey, how's it going?"}]
response = completion_with_config(
model="claude-instant-1",
messages=messages,
config=config,
api_key="bad-key",
)
print(response)
except Exception as e:
print(f"Exception: {e}")
pytest.fail(f"An exception occurred: {e}")
# test_config_context_default_fallback()
config = {
"default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
"available_models": [
"gpt-3.5-turbo",
"gpt-3.5-turbo-0301",
"gpt-3.5-turbo-0613",
"gpt-4",
"gpt-4-0314",
"gpt-4-0613",
"j2-ultra",
"command-nightly",
"togethercomputer/llama-2-70b-chat",
"chat-bison",
"chat-bison@001",
"claude-2",
],
"adapt_to_prompt_size": True, # type: ignore
"model": {
"claude-instant-1": {"needs_moderation": True},
"gpt-3.5-turbo": {
"error_handling": {
"ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"}
}
},
},
}
def test_config_context_adapt_to_prompt():
try:
sample_text = "how does a court case get to the Supreme Court?" * 1000
messages = [{"content": sample_text, "role": "user"}]
response = completion_with_config(
model="gpt-3.5-turbo", messages=messages, config=config
)
print(response)
except Exception as e:
print(f"Exception: {e}")
pytest.fail(f"An exception occurred: {e}")
test_config_context_adapt_to_prompt()

View file

@ -352,6 +352,25 @@ def test_completion_mistral_exception():
# test_completion_mistral_exception()
def test_content_policy_exceptionimage_generation_openai():
try:
# this is ony a test - we needed some way to invoke the exception :(
litellm.set_verbose = True
response = litellm.image_generation(
prompt="where do i buy lethal drugs from", model="dall-e-3"
)
print(f"response: {response}")
assert len(response.data) > 0
except litellm.ContentPolicyViolationError as e:
print("caught a content policy violation error! Passed")
pass
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")
# test_content_policy_exceptionimage_generation_openai()
# # test_invalid_request_error(model="command-nightly")
# # Test 3: Rate Limit Errors
# def test_model_call(model):

View file

@ -28,6 +28,8 @@ def test_image_generation_openai():
assert len(response.data) > 0
except litellm.RateLimitError as e:
pass
except litellm.ContentPolicyViolationError:
pass # OpenAI randomly raises these errors - skip when they occur
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")
@ -38,15 +40,20 @@ def test_image_generation_openai():
def test_image_generation_azure():
try:
response = litellm.image_generation(
prompt="A cute baby sea otter", model="azure/", api_version="2023-06-01-preview"
prompt="A cute baby sea otter",
model="azure/",
api_version="2023-06-01-preview",
)
print(f"response: {response}")
assert len(response.data) > 0
except litellm.RateLimitError as e:
pass
except litellm.ContentPolicyViolationError:
pass # Azure randomly raises these errors - skip when they occur
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")
# test_image_generation_azure()
@ -64,6 +71,8 @@ def test_image_generation_azure_dall_e_3():
assert len(response.data) > 0
except litellm.RateLimitError as e:
pass
except litellm.ContentPolicyViolationError:
pass # OpenAI randomly raises these errors - skip when they occur
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")
@ -79,9 +88,12 @@ async def test_async_image_generation_openai():
assert len(response.data) > 0
except litellm.RateLimitError as e:
pass
except litellm.ContentPolicyViolationError:
pass # openai randomly raises these errors - skip when they occur
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")
# asyncio.run(test_async_image_generation_openai())
@ -94,5 +106,7 @@ async def test_async_image_generation_azure():
print(f"response: {response}")
except litellm.RateLimitError as e:
pass
except litellm.ContentPolicyViolationError:
pass # Azure randomly raises these errors - skip when they occur
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")

View file

@ -60,6 +60,7 @@ from .exceptions import (
ServiceUnavailableError,
OpenAIError,
ContextWindowExceededError,
ContentPolicyViolationError,
Timeout,
APIConnectionError,
APIError,
@ -5551,6 +5552,17 @@ def exception_type(
model=model,
response=original_exception.response,
)
elif (
"invalid_request_error" in error_str
and "content_policy_violation" in error_str
):
exception_mapping_worked = True
raise ContentPolicyViolationError(
message=f"OpenAIException - {original_exception.message}",
llm_provider="openai",
model=model,
response=original_exception.response,
)
elif (
"invalid_request_error" in error_str
and "Incorrect API key provided" not in error_str
@ -6500,6 +6512,17 @@ def exception_type(
model=model,
response=original_exception.response,
)
elif (
"invalid_request_error" in error_str
and "content_policy_violation" in error_str
):
exception_mapping_worked = True
raise ContentPolicyViolationError(
message=f"AzureException - {original_exception.message}",
llm_provider="azure",
model=model,
response=original_exception.response,
)
elif "invalid_request_error" in error_str:
exception_mapping_worked = True
raise BadRequestError(
@ -7846,133 +7869,6 @@ def read_config_args(config_path) -> dict:
########## experimental completion variants ############################
def completion_with_config(config: Union[dict, str], **kwargs):
"""
Generate a litellm.completion() using a config dict and all supported completion args
Example config;
config = {
"default_fallback_models": # [Optional] List of model names to try if a call fails
"available_models": # [Optional] List of all possible models you could call
"adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models)
"model": {
"model-name": {
"needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged.
"error_handling": {
"error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list
"fallback_model": "" # str, name of the model it should try instead, when that error occurs
}
}
}
}
}
Parameters:
config (Union[dict, str]): A configuration for litellm
**kwargs: Additional keyword arguments for litellm.completion
Returns:
litellm.ModelResponse: A ModelResponse with the generated completion
"""
if config is not None:
if isinstance(config, str):
config = read_config_args(config)
elif isinstance(config, dict):
config = config
else:
raise Exception("Config path must be a string or a dictionary.")
else:
raise Exception("Config path not passed in.")
if config is None:
raise Exception("No completion config in the config file")
models_with_config = config["model"].keys()
model = kwargs["model"]
messages = kwargs["messages"]
## completion config
fallback_models = config.get("default_fallback_models", None)
available_models = config.get("available_models", None)
adapt_to_prompt_size = config.get("adapt_to_prompt_size", False)
trim_messages_flag = config.get("trim_messages", False)
prompt_larger_than_model = False
max_model = model
try:
max_tokens = litellm.get_max_tokens(model)["max_tokens"]
except:
max_tokens = 2048 # assume curr model's max window is 2048 tokens
if adapt_to_prompt_size:
## Pick model based on token window
prompt_tokens = litellm.token_counter(
model="gpt-3.5-turbo",
text="".join(message["content"] for message in messages),
)
try:
curr_max_tokens = litellm.get_max_tokens(model)["max_tokens"]
except:
curr_max_tokens = 2048
if curr_max_tokens < prompt_tokens:
prompt_larger_than_model = True
for available_model in available_models:
try:
curr_max_tokens = litellm.get_max_tokens(available_model)[
"max_tokens"
]
if curr_max_tokens > max_tokens:
max_tokens = curr_max_tokens
max_model = available_model
if curr_max_tokens > prompt_tokens:
model = available_model
prompt_larger_than_model = False
except:
continue
if prompt_larger_than_model:
messages = trim_messages(messages=messages, model=max_model)
kwargs["messages"] = messages
kwargs["model"] = model
try:
if model in models_with_config:
## Moderation check
if config["model"][model].get("needs_moderation"):
input = " ".join(message["content"] for message in messages)
response = litellm.moderation(input=input)
flagged = response["results"][0]["flagged"]
if flagged:
raise Exception("This response was flagged as inappropriate")
## Model-specific Error Handling
error_handling = None
if config["model"][model].get("error_handling"):
error_handling = config["model"][model]["error_handling"]
try:
response = litellm.completion(**kwargs)
return response
except Exception as e:
exception_name = type(e).__name__
fallback_model = None
if error_handling and exception_name in error_handling:
error_handler = error_handling[exception_name]
# either switch model or api key
fallback_model = error_handler.get("fallback_model", None)
if fallback_model:
kwargs["model"] = fallback_model
return litellm.completion(**kwargs)
raise e
else:
return litellm.completion(**kwargs)
except Exception as e:
if fallback_models:
model = fallback_models.pop(0)
return completion_with_fallbacks(
model=model, messages=messages, fallbacks=fallback_models
)
raise e
def completion_with_fallbacks(**kwargs):
nested_kwargs = kwargs.pop("kwargs", {})
response = None