Merge branch 'main' into explicit-args-acomplete

2024-01-09 13:07:37 +01:00 · 2024-01-09 13:07:37 +01:00 · 203089e6c7
commit 203089e6c7
parent 9aedd4e794 4cfa010dbd
10 changed files with 271 additions and 477 deletions
--- a/docs/my-website/docs/completion/config.md
+++ b/docs/my-website/docs/completion/config.md
@ -1,49 +0,0 @@
-# Model Config
-
-Model-specific changes can make our code complicated, making it harder to debug errors. Use model configs to simplify this. 
-
-### usage
-
-Handling prompt logic. Different models have different context windows. Use `adapt_to_prompt_size` to select the right model for the prompt (in case the current model is too small).
-
-
-```python
-from litellm import completion_with_config 
-import os 
-
-config = {
-    "available_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"],
-    "adapt_to_prompt_size": True, # 👈 key change
-}
-
-# set env var
-os.environ["OPENAI_API_KEY"] = "your-api-key"
-os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
-
-
-sample_text = "how does a court case get to the Supreme Court?" * 1000
-messages = [{"content": sample_text, "role": "user"}]
-response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
-```
-
-[**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783)
-
-### Complete Config Structure
-
-```python
-config = {
-    "default_fallback_models": # [Optional] List of model names to try if a call fails
-    "available_models": # [Optional] List of all possible models you could call 
-    "adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models)
-    "model": {
-        "model-name": {
-            "needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged. 
-            "error_handling": {
-                "error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list
-                    "fallback_model": "" # str, name of the model it should try instead, when that error occurs 
-                }
-            }
-        }
-    }
-}
-```
--- a/docs/my-website/docs/exception_mapping.md
+++ b/docs/my-website/docs/exception_mapping.md
@ -12,6 +12,7 @@ LiteLLM maps exceptions across all providers to their OpenAI counterparts.
 | 429         | RateLimitError           |
 | >=500       | InternalServerError      |
 | N/A         | ContextWindowExceededError|
+| 400         | ContentPolicyViolationError|
 | N/A         | APIConnectionError       |


--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -129,26 +129,6 @@ const sidebars = {
    "secret",
    "completion/token_usage",
    "load_test",
-    {
-      type: 'category',
-      label: 'Tutorials',
-      items: [
-        'tutorials/azure_openai',
-        "tutorials/lm_evaluation_harness",
-        "tutorials/eval_suites",
-        'tutorials/oobabooga',
-        "tutorials/gradio_integration",
-        'tutorials/huggingface_codellama',
-        'tutorials/huggingface_tutorial', 
-        'tutorials/TogetherAI_liteLLM', 
-        'tutorials/finetuned_chat_gpt',
-        'tutorials/sagemaker_llms',
-        'tutorials/text_completion',
-        "tutorials/first_playground",
-        'tutorials/compare_llms',
-        "tutorials/model_fallbacks",
-      ],
-    },
    {
      type: "category",
      label: "Logging & Observability",
@ -170,6 +150,23 @@ const sidebars = {
      ],
    },
    "caching/redis_cache",
+    {
+      type: 'category',
+      label: 'Tutorials',
+      items: [
+        'tutorials/azure_openai',
+        'tutorials/oobabooga',
+        "tutorials/gradio_integration",
+        'tutorials/huggingface_codellama',
+        'tutorials/huggingface_tutorial', 
+        'tutorials/TogetherAI_liteLLM', 
+        'tutorials/finetuned_chat_gpt',
+        'tutorials/sagemaker_llms',
+        'tutorials/text_completion',
+        "tutorials/first_playground",
+        "tutorials/model_fallbacks",
+      ],
+    },
    {
      type: "category",
      label: "LangChain, LlamaIndex Integration",
--- a/litellm/init.py
+++ b/litellm/init.py
@ -500,7 +500,6 @@ from .utils import (
    validate_environment,
    check_valid_key,
    get_llm_provider,
-    completion_with_config,
    register_model,
    encode,
    decode,
@ -544,6 +543,7 @@ from .exceptions import (
    ServiceUnavailableError,
    OpenAIError,
    ContextWindowExceededError,
+    ContentPolicyViolationError,
    BudgetExceededError,
    APIError,
    Timeout,
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@ -108,6 +108,21 @@ class ContextWindowExceededError(BadRequestError):  # type: ignore
        )  # Call the base class constructor with the parameters it needs


+class ContentPolicyViolationError(BadRequestError):  # type: ignore
+    #  Error code: 400 - {'error': {'code': 'content_policy_violation', 'message': 'Your request was rejected as a result of our safety system. Image descriptions generated from your prompt may contain text that is not allowed by our safety system. If you believe this was done in error, your request may succeed if retried, or by adjusting your prompt.', 'param': None, 'type': 'invalid_request_error'}}
+    def __init__(self, message, model, llm_provider, response: httpx.Response):
+        self.status_code = 400
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        super().__init__(
+            message=self.message,
+            model=self.model,  # type: ignore
+            llm_provider=self.llm_provider,  # type: ignore
+            response=response,
+        )  # Call the base class constructor with the parameters it needs
+
+
 class ServiceUnavailableError(APIStatusError):  # type: ignore
    def __init__(self, message, llm_provider, model, response: httpx.Response):
        self.status_code = 503
--- a/litellm/main.py
+++ b/litellm/main.py
@ -1173,7 +1173,7 @@ def completion(
                acompletion=acompletion,
                logging_obj=logging,
                custom_prompt_dict=custom_prompt_dict,
-                timeout=timeout
+                timeout=timeout,
            )
            if (
                "stream" in optional_params
@ -2894,6 +2894,7 @@ def image_generation(

    Currently supports just Azure + OpenAI.
    """
+    try:
        aimg_generation = kwargs.get("aimg_generation", False)
        litellm_call_id = kwargs.get("litellm_call_id", None)
        logger_fn = kwargs.get("logger_fn", None)
@ -3046,6 +3047,14 @@ def image_generation(
            )

        return model_response
+    except Exception as e:
+        ## Map to OpenAI Exception
+        raise exception_type(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            original_exception=e,
+            completion_kwargs=locals(),
+        )


 ##### Health Endpoints #######################
@ -3170,7 +3179,8 @@ def config_completion(**kwargs):
            "No config path set, please set a config path using `litellm.config_path = 'path/to/config.json'`"
        )

-def stream_chunk_builder_text_completion(chunks: list, messages: Optional[List]=None):
+
+def stream_chunk_builder_text_completion(chunks: list, messages: Optional[List] = None):
    id = chunks[0]["id"]
    object = chunks[0]["object"]
    created = chunks[0]["created"]
@ -3190,20 +3200,24 @@ def stream_chunk_builder_text_completion(chunks: list, messages: Optional[List]=
                "text": None,
                "index": 0,
                "logprobs": logprobs,
-            "finish_reason": finish_reason
+                "finish_reason": finish_reason,
            }
        ],
        "usage": {
            "prompt_tokens": None,
            "completion_tokens": None,
-            "total_tokens": None
-        }
+            "total_tokens": None,
+        },
    }
    content_list = []
    for chunk in chunks:
        choices = chunk["choices"]
        for choice in choices:
-            if choice is not None and hasattr(choice, "text") and choice.get("text") is not None:
+            if (
+                choice is not None
+                and hasattr(choice, "text")
+                and choice.get("text") is not None
+            ):
                _choice = choice.get("text")
                content_list.append(_choice)

@ -3235,13 +3249,18 @@ def stream_chunk_builder_text_completion(chunks: list, messages: Optional[List]=
    )
    return response

+
 def stream_chunk_builder(chunks: list, messages: Optional[list] = None):
    id = chunks[0]["id"]
    object = chunks[0]["object"]
    created = chunks[0]["created"]
    model = chunks[0]["model"]
    system_fingerprint = chunks[0].get("system_fingerprint", None)
-    if isinstance(chunks[0]["choices"][0], litellm.utils.TextChoices): # route to the text completion logic
+
+    if isinstance(
+        chunks[0]["choices"][0], litellm.utils.TextChoices
+    ):  # route to the text completion logic
+
        return stream_chunk_builder_text_completion(chunks=chunks, messages=messages)
    role = chunks[0]["choices"][0]["delta"]["role"]
    finish_reason = chunks[-1]["choices"][0]["finish_reason"]
--- a/litellm/tests/test_config.py
+++ b/litellm/tests/test_config.py
@ -1,118 +0,0 @@
-import sys, os
-import traceback
-from dotenv import load_dotenv
-
-load_dotenv()
-import os
-
-sys.path.insert(
-    0, os.path.abspath("../..")
-)  # Adds the parent directory to the system path
-import pytest
-import litellm
-from litellm import completion_with_config
-
-config = {
-    "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
-    "model": {
-        "claude-instant-1": {"needs_moderation": True},
-        "gpt-3.5-turbo": {
-            "error_handling": {
-                "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"}
-            }
-        },
-    },
-}
-
-
-def test_config_context_window_exceeded():
-    try:
-        sample_text = "how does a court case get to the Supreme Court?" * 1000
-        messages = [{"content": sample_text, "role": "user"}]
-        response = completion_with_config(
-            model="gpt-3.5-turbo", messages=messages, config=config
-        )
-        print(response)
-    except Exception as e:
-        print(f"Exception: {e}")
-        pytest.fail(f"An exception occurred: {e}")
-
-
-# test_config_context_window_exceeded()
-
-
-def test_config_context_moderation():
-    try:
-        messages = [{"role": "user", "content": "I want to kill them."}]
-        response = completion_with_config(
-            model="claude-instant-1", messages=messages, config=config
-        )
-        print(response)
-    except Exception as e:
-        print(f"Exception: {e}")
-        pytest.fail(f"An exception occurred: {e}")
-
-
-# test_config_context_moderation()
-
-
-def test_config_context_default_fallback():
-    try:
-        messages = [{"role": "user", "content": "Hey, how's it going?"}]
-        response = completion_with_config(
-            model="claude-instant-1",
-            messages=messages,
-            config=config,
-            api_key="bad-key",
-        )
-        print(response)
-    except Exception as e:
-        print(f"Exception: {e}")
-        pytest.fail(f"An exception occurred: {e}")
-
-
-# test_config_context_default_fallback()
-
-
-config = {
-    "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
-    "available_models": [
-        "gpt-3.5-turbo",
-        "gpt-3.5-turbo-0301",
-        "gpt-3.5-turbo-0613",
-        "gpt-4",
-        "gpt-4-0314",
-        "gpt-4-0613",
-        "j2-ultra",
-        "command-nightly",
-        "togethercomputer/llama-2-70b-chat",
-        "chat-bison",
-        "chat-bison@001",
-        "claude-2",
-    ],
-    "adapt_to_prompt_size": True,  # type: ignore
-    "model": {
-        "claude-instant-1": {"needs_moderation": True},
-        "gpt-3.5-turbo": {
-            "error_handling": {
-                "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"}
-            }
-        },
-    },
-}
-
-
-def test_config_context_adapt_to_prompt():
-    try:
-        sample_text = "how does a court case get to the Supreme Court?" * 1000
-        messages = [{"content": sample_text, "role": "user"}]
-        response = completion_with_config(
-            model="gpt-3.5-turbo", messages=messages, config=config
-        )
-        print(response)
-    except Exception as e:
-        print(f"Exception: {e}")
-        pytest.fail(f"An exception occurred: {e}")
-
-
-test_config_context_adapt_to_prompt()
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@ -352,6 +352,25 @@ def test_completion_mistral_exception():
 # test_completion_mistral_exception()


+def test_content_policy_exceptionimage_generation_openai():
+    try:
+        # this is ony a test - we needed some way to invoke the exception :(
+        litellm.set_verbose = True
+        response = litellm.image_generation(
+            prompt="where do i buy lethal drugs from", model="dall-e-3"
+        )
+        print(f"response: {response}")
+        assert len(response.data) > 0
+    except litellm.ContentPolicyViolationError as e:
+        print("caught a content policy violation error! Passed")
+        pass
+    except Exception as e:
+        pytest.fail(f"An exception occurred - {str(e)}")
+
+
+# test_content_policy_exceptionimage_generation_openai()
+
+
 # # test_invalid_request_error(model="command-nightly")
 # # Test 3: Rate Limit Errors
 # def test_model_call(model):
--- a/litellm/tests/test_image_generation.py
+++ b/litellm/tests/test_image_generation.py
@ -28,6 +28,8 @@ def test_image_generation_openai():
        assert len(response.data) > 0
    except litellm.RateLimitError as e:
        pass
+    except litellm.ContentPolicyViolationError:
+        pass  # OpenAI randomly raises these errors - skip when they occur
    except Exception as e:
        pytest.fail(f"An exception occurred - {str(e)}")

@ -38,15 +40,20 @@ def test_image_generation_openai():
 def test_image_generation_azure():
    try:
        response = litellm.image_generation(
-            prompt="A cute baby sea otter", model="azure/", api_version="2023-06-01-preview"
+            prompt="A cute baby sea otter",
+            model="azure/",
+            api_version="2023-06-01-preview",
        )
        print(f"response: {response}")
        assert len(response.data) > 0
    except litellm.RateLimitError as e:
        pass
+    except litellm.ContentPolicyViolationError:
+        pass  # Azure randomly raises these errors - skip when they occur
    except Exception as e:
        pytest.fail(f"An exception occurred - {str(e)}")

+
 # test_image_generation_azure()


@ -64,6 +71,8 @@ def test_image_generation_azure_dall_e_3():
        assert len(response.data) > 0
    except litellm.RateLimitError as e:
        pass
+    except litellm.ContentPolicyViolationError:
+        pass  # OpenAI randomly raises these errors - skip when they occur
    except Exception as e:
        pytest.fail(f"An exception occurred - {str(e)}")

@ -79,9 +88,12 @@ async def test_async_image_generation_openai():
        assert len(response.data) > 0
    except litellm.RateLimitError as e:
        pass
+    except litellm.ContentPolicyViolationError:
+        pass  # openai randomly raises these errors - skip when they occur
    except Exception as e:
        pytest.fail(f"An exception occurred - {str(e)}")

+
 # asyncio.run(test_async_image_generation_openai())


@ -94,5 +106,7 @@ async def test_async_image_generation_azure():
        print(f"response: {response}")
    except litellm.RateLimitError as e:
        pass
+    except litellm.ContentPolicyViolationError:
+        pass  # Azure randomly raises these errors - skip when they occur
    except Exception as e:
        pytest.fail(f"An exception occurred - {str(e)}")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -60,6 +60,7 @@ from .exceptions import (
    ServiceUnavailableError,
    OpenAIError,
    ContextWindowExceededError,
+    ContentPolicyViolationError,
    Timeout,
    APIConnectionError,
    APIError,
@ -5551,6 +5552,17 @@ def exception_type(
                        model=model,
                        response=original_exception.response,
                    )
+                elif (
+                    "invalid_request_error" in error_str
+                    and "content_policy_violation" in error_str
+                ):
+                    exception_mapping_worked = True
+                    raise ContentPolicyViolationError(
+                        message=f"OpenAIException - {original_exception.message}",
+                        llm_provider="openai",
+                        model=model,
+                        response=original_exception.response,
+                    )
                elif (
                    "invalid_request_error" in error_str
                    and "Incorrect API key provided" not in error_str
@ -6500,6 +6512,17 @@ def exception_type(
                        model=model,
                        response=original_exception.response,
                    )
+                elif (
+                    "invalid_request_error" in error_str
+                    and "content_policy_violation" in error_str
+                ):
+                    exception_mapping_worked = True
+                    raise ContentPolicyViolationError(
+                        message=f"AzureException - {original_exception.message}",
+                        llm_provider="azure",
+                        model=model,
+                        response=original_exception.response,
+                    )
                elif "invalid_request_error" in error_str:
                    exception_mapping_worked = True
                    raise BadRequestError(
@ -7846,133 +7869,6 @@ def read_config_args(config_path) -> dict:
 ########## experimental completion variants ############################


-def completion_with_config(config: Union[dict, str], **kwargs):
-    """
-    Generate a litellm.completion() using a config dict and all supported completion args
-
-    Example config;
-    config = {
-        "default_fallback_models": # [Optional] List of model names to try if a call fails
-        "available_models": # [Optional] List of all possible models you could call
-        "adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models)
-        "model": {
-            "model-name": {
-                "needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged.
-                "error_handling": {
-                    "error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list
-                        "fallback_model": "" # str, name of the model it should try instead, when that error occurs
-                    }
-                }
-            }
-        }
-    }
-
-    Parameters:
-        config (Union[dict, str]): A configuration for litellm
-        **kwargs: Additional keyword arguments for litellm.completion
-
-    Returns:
-        litellm.ModelResponse: A ModelResponse with the generated completion
-
-    """
-    if config is not None:
-        if isinstance(config, str):
-            config = read_config_args(config)
-        elif isinstance(config, dict):
-            config = config
-        else:
-            raise Exception("Config path must be a string or a dictionary.")
-    else:
-        raise Exception("Config path not passed in.")
-
-    if config is None:
-        raise Exception("No completion config in the config file")
-
-    models_with_config = config["model"].keys()
-    model = kwargs["model"]
-    messages = kwargs["messages"]
-
-    ## completion config
-    fallback_models = config.get("default_fallback_models", None)
-    available_models = config.get("available_models", None)
-    adapt_to_prompt_size = config.get("adapt_to_prompt_size", False)
-    trim_messages_flag = config.get("trim_messages", False)
-    prompt_larger_than_model = False
-    max_model = model
-    try:
-        max_tokens = litellm.get_max_tokens(model)["max_tokens"]
-    except:
-        max_tokens = 2048  # assume curr model's max window is 2048 tokens
-    if adapt_to_prompt_size:
-        ## Pick model based on token window
-        prompt_tokens = litellm.token_counter(
-            model="gpt-3.5-turbo",
-            text="".join(message["content"] for message in messages),
-        )
-        try:
-            curr_max_tokens = litellm.get_max_tokens(model)["max_tokens"]
-        except:
-            curr_max_tokens = 2048
-        if curr_max_tokens < prompt_tokens:
-            prompt_larger_than_model = True
-            for available_model in available_models:
-                try:
-                    curr_max_tokens = litellm.get_max_tokens(available_model)[
-                        "max_tokens"
-                    ]
-                    if curr_max_tokens > max_tokens:
-                        max_tokens = curr_max_tokens
-                        max_model = available_model
-                    if curr_max_tokens > prompt_tokens:
-                        model = available_model
-                        prompt_larger_than_model = False
-                except:
-                    continue
-        if prompt_larger_than_model:
-            messages = trim_messages(messages=messages, model=max_model)
-            kwargs["messages"] = messages
-
-    kwargs["model"] = model
-    try:
-        if model in models_with_config:
-            ## Moderation check
-            if config["model"][model].get("needs_moderation"):
-                input = " ".join(message["content"] for message in messages)
-                response = litellm.moderation(input=input)
-                flagged = response["results"][0]["flagged"]
-                if flagged:
-                    raise Exception("This response was flagged as inappropriate")
-
-            ## Model-specific Error Handling
-            error_handling = None
-            if config["model"][model].get("error_handling"):
-                error_handling = config["model"][model]["error_handling"]
-
-            try:
-                response = litellm.completion(**kwargs)
-                return response
-            except Exception as e:
-                exception_name = type(e).__name__
-                fallback_model = None
-                if error_handling and exception_name in error_handling:
-                    error_handler = error_handling[exception_name]
-                    # either switch model or api key
-                    fallback_model = error_handler.get("fallback_model", None)
-                if fallback_model:
-                    kwargs["model"] = fallback_model
-                    return litellm.completion(**kwargs)
-                raise e
-        else:
-            return litellm.completion(**kwargs)
-    except Exception as e:
-        if fallback_models:
-            model = fallback_models.pop(0)
-            return completion_with_fallbacks(
-                model=model, messages=messages, fallbacks=fallback_models
-            )
-        raise e
-
-
 def completion_with_fallbacks(**kwargs):
    nested_kwargs = kwargs.pop("kwargs", {})
    response = None