Litellm ruff linting enforcement (#5992)

* ci(config.yml): add a 'check_code_quality' step Addresses https://github.com/BerriAI/litellm/issues/5991 * ci(config.yml): check why circle ci doesn't pick up this test * ci(config.yml): fix to run 'check_code_quality' tests * fix(__init__.py): fix unprotected import * fix(__init__.py): don't remove unused imports * build(ruff.toml): update ruff.toml to ignore unused imports * fix: fix: ruff + pyright - fix linting + type-checking errors * fix: fix linting errors * fix(lago.py): fix module init error * fix: fix linting errors * ci(config.yml): cd into correct dir for checks * fix(proxy_server.py): fix linting error * fix(utils.py): fix bare except causes ruff linting errors * fix: ruff - fix remaining linting errors * fix(clickhouse.py): use standard logging object * fix(__init__.py): fix unprotected import * fix: ruff - fix linting errors * fix: fix linting errors * ci(config.yml): cleanup code qa step (formatting handled in local_testing) * fix(_health_endpoints.py): fix ruff linting errors * ci(config.yml): just use ruff in check_code_quality pipeline for now * build(custom_guardrail.py): include missing file * style(embedding_handler.py): fix ruff check
2025-04-25 18:54:30 +00:00 · 2024-10-01 16:44:20 -07:00 · 2024-10-01 16:44:20 -07:00 · d57be47b0f
commit d57be47b0f
parent 3fc4ae0d65
263 changed files with 1687 additions and 3320 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -106,6 +106,7 @@ from .llms.prompt_templates.factory import (
    custom_prompt,
    function_call_prompt,
    map_system_message_pt,
+    ollama_pt,
    prompt_factory,
    stringify_json_tool_call_content,
 )
@ -150,7 +151,6 @@ from .types.utils import (
 encoding = tiktoken.get_encoding("cl100k_base")
 from litellm.utils import (
    Choices,
-    CustomStreamWrapper,
    EmbeddingResponse,
    ImageResponse,
    Message,
@ -159,8 +159,6 @@ from litellm.utils import (
    TextCompletionResponse,
    TextCompletionStreamWrapper,
    TranscriptionResponse,
-    get_secret,
-    read_config_args,
 )

 ####### ENVIRONMENT VARIABLES ###################
@ -214,7 +212,7 @@ class LiteLLM:
 class Chat:
    def __init__(self, params, router_obj: Optional[Any]):
        self.params = params
-        if self.params.get("acompletion", False) == True:
+        if self.params.get("acompletion", False) is True:
            self.params.pop("acompletion")
            self.completions: Union[AsyncCompletions, Completions] = AsyncCompletions(
                self.params, router_obj=router_obj
@ -837,10 +835,10 @@ def completion(
        model_response = ModelResponse()
        setattr(model_response, "usage", litellm.Usage())
        if (
-            kwargs.get("azure", False) == True
+            kwargs.get("azure", False) is True
        ):  # don't remove flag check, to remain backwards compatible for repos like Codium
            custom_llm_provider = "azure"
-        if deployment_id != None:  # azure llms
+        if deployment_id is not None:  # azure llms
            model = deployment_id
            custom_llm_provider = "azure"
        model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(
@ -1156,7 +1154,7 @@ def completion(
                client=client,  # pass AsyncAzureOpenAI, AzureOpenAI client
            )

-            if optional_params.get("stream", False) or acompletion == True:
+            if optional_params.get("stream", False) or acompletion is True:
                ## LOGGING
                logging.post_call(
                    input=messages,
@ -1278,7 +1276,7 @@ def completion(
            if (
                len(messages) > 0
                and "content" in messages[0]
-                and type(messages[0]["content"]) == list
+                and isinstance(messages[0]["content"], list)
            ):
                # text-davinci-003 can accept a string or array, if it's an array, assume the array is set in messages[0]['content']
                # https://platform.openai.com/docs/api-reference/completions/create
@ -1304,16 +1302,16 @@ def completion(
            )

            if (
-                optional_params.get("stream", False) == False
-                and acompletion == False
-                and text_completion == False
+                optional_params.get("stream", False) is False
+                and acompletion is False
+                and text_completion is False
            ):
                # convert to chat completion response
                _response = litellm.OpenAITextCompletionConfig().convert_to_chat_model_response_object(
                    response_object=_response, model_response_object=model_response
                )

-            if optional_params.get("stream", False) or acompletion == True:
+            if optional_params.get("stream", False) or acompletion is True:
                ## LOGGING
                logging.post_call(
                    input=messages,
@ -1519,7 +1517,7 @@ def completion(
                acompletion=acompletion,
            )

-            if optional_params.get("stream", False) == True:
+            if optional_params.get("stream", False) is True:
                ## LOGGING
                logging.post_call(
                    input=messages,
@ -1566,7 +1564,7 @@ def completion(
                custom_prompt_dict=custom_prompt_dict,
            )

-            if "stream" in optional_params and optional_params["stream"] == True:
+            if "stream" in optional_params and optional_params["stream"] is True:
                # don't try to access stream object,
                ## LOGGING
                logging.post_call(
@ -1575,7 +1573,7 @@ def completion(
                    original_response=model_response,
                )

-            if optional_params.get("stream", False) or acompletion == True:
+            if optional_params.get("stream", False) or acompletion is True:
                ## LOGGING
                logging.post_call(
                    input=messages,
@ -1654,7 +1652,7 @@ def completion(
                    timeout=timeout,
                    client=client,
                )
-            if optional_params.get("stream", False) or acompletion == True:
+            if optional_params.get("stream", False) or acompletion is True:
                ## LOGGING
                logging.post_call(
                    input=messages,
@ -1691,7 +1689,7 @@ def completion(
                logging_obj=logging,
            )

-            if "stream" in optional_params and optional_params["stream"] == True:
+            if "stream" in optional_params and optional_params["stream"] is True:
                # don't try to access stream object,
                response = CustomStreamWrapper(
                    response,
@ -1700,7 +1698,7 @@ def completion(
                    logging_obj=logging,
                )

-            if optional_params.get("stream", False) or acompletion == True:
+            if optional_params.get("stream", False) or acompletion is True:
                ## LOGGING
                logging.post_call(
                    input=messages,
@ -1740,7 +1738,7 @@ def completion(
                logging_obj=logging,  # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
            )

-            if "stream" in optional_params and optional_params["stream"] == True:
+            if "stream" in optional_params and optional_params["stream"] is True:
                # don't try to access stream object,
                response = CustomStreamWrapper(
                    model_response,
@ -1788,7 +1786,7 @@ def completion(
                logging_obj=logging,  # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
            )

-            if "stream" in optional_params and optional_params["stream"] == True:
+            if "stream" in optional_params and optional_params["stream"] is True:
                # don't try to access stream object,
                response = CustomStreamWrapper(
                    model_response,
@ -1836,7 +1834,7 @@ def completion(
                logging_obj=logging,  # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
            )

-            if "stream" in optional_params and optional_params["stream"] == True:
+            if "stream" in optional_params and optional_params["stream"] is True:
                # don't try to access stream object,
                response = CustomStreamWrapper(
                    model_response,
@ -1875,7 +1873,7 @@ def completion(
                logging_obj=logging,
            )

-            if "stream" in optional_params and optional_params["stream"] == True:
+            if "stream" in optional_params and optional_params["stream"] is True:
                # don't try to access stream object,
                response = CustomStreamWrapper(
                    model_response,
@ -1916,7 +1914,7 @@ def completion(
            )
            if (
                "stream" in optional_params
-                and optional_params["stream"] == True
+                and optional_params["stream"] is True
                and acompletion is False
            ):
                # don't try to access stream object,
@ -1943,7 +1941,7 @@ def completion(
                encoding=encoding,
                logging_obj=logging,
            )
-            if "stream" in optional_params and optional_params["stream"] == True:
+            if "stream" in optional_params and optional_params["stream"] is True:
                # don't try to access stream object,
                response = CustomStreamWrapper(
                    model_response,
@ -2095,7 +2093,7 @@ def completion(
                logging_obj=logging,
            )
            # fake palm streaming
-            if "stream" in optional_params and optional_params["stream"] == True:
+            if "stream" in optional_params and optional_params["stream"] is True:
                # fake streaming for palm
                resp_string = model_response["choices"][0]["message"]["content"]
                response = CustomStreamWrapper(
@ -2390,7 +2388,7 @@ def completion(
                logging_obj=logging,
            )

-            if "stream" in optional_params and optional_params["stream"] == True:
+            if "stream" in optional_params and optional_params["stream"] is True:
                # don't try to access stream object,
                response = CustomStreamWrapper(
                    model_response,
@ -2527,7 +2525,7 @@ def completion(
            )
            if (
                "stream" in optional_params
-                and optional_params["stream"] == True
+                and optional_params["stream"] is True
                and not isinstance(response, CustomStreamWrapper)
            ):
                # don't try to access stream object,
@ -2563,7 +2561,7 @@ def completion(
            )

            if (
-                "stream" in optional_params and optional_params["stream"] == True
+                "stream" in optional_params and optional_params["stream"] is True
            ):  ## [BETA]
                # don't try to access stream object,
                response = CustomStreamWrapper(
@ -2587,38 +2585,38 @@ def completion(
            if model in custom_prompt_dict:
                # check if the model has a registered custom prompt
                model_prompt_details = custom_prompt_dict[model]
-                prompt = custom_prompt(
+                ollama_prompt = custom_prompt(
                    role_dict=model_prompt_details["roles"],
                    initial_prompt_value=model_prompt_details["initial_prompt_value"],
                    final_prompt_value=model_prompt_details["final_prompt_value"],
                    messages=messages,
                )
            else:
-                prompt = prompt_factory(
-                    model=model,
-                    messages=messages,
-                    custom_llm_provider=custom_llm_provider,
-                )
-                if isinstance(prompt, dict):
+                modified_prompt = ollama_pt(model=model, messages=messages)
+                if isinstance(modified_prompt, dict):
                    # for multimode models - ollama/llava prompt_factory returns a dict {
                    #     "prompt": prompt,
                    #     "images": images
                    # }
-                    prompt, images = prompt["prompt"], prompt["images"]
+                    ollama_prompt, images = (
+                        modified_prompt["prompt"],
+                        modified_prompt["images"],
+                    )
                    optional_params["images"] = images
-
+                else:
+                    ollama_prompt = modified_prompt
            ## LOGGING
            generator = ollama.get_ollama_response(
                api_base=api_base,
                model=model,
-                prompt=prompt,
+                prompt=ollama_prompt,
                optional_params=optional_params,
                logging_obj=logging,
                acompletion=acompletion,
                model_response=model_response,
                encoding=encoding,
            )
-            if acompletion is True or optional_params.get("stream", False) == True:
+            if acompletion is True or optional_params.get("stream", False) is True:
                return generator

            response = generator
@ -2701,7 +2699,7 @@ def completion(
                api_key=api_key,
                logging_obj=logging,
            )
-            if "stream" in optional_params and optional_params["stream"] == True:
+            if "stream" in optional_params and optional_params["stream"] is True:
                # don't try to access stream object,
                response = CustomStreamWrapper(
                    response,
@ -2710,7 +2708,7 @@ def completion(
                    logging_obj=logging,
                )

-            if optional_params.get("stream", False) or acompletion == True:
+            if optional_params.get("stream", False) or acompletion is True:
                ## LOGGING
                logging.post_call(
                    input=messages,
@ -2743,7 +2741,7 @@ def completion(
                logging_obj=logging,
            )
            if inspect.isgenerator(model_response) or (
-                "stream" in optional_params and optional_params["stream"] == True
+                "stream" in optional_params and optional_params["stream"] is True
            ):
                # don't try to access stream object,
                response = CustomStreamWrapper(
@ -2771,7 +2769,7 @@ def completion(
                encoding=encoding,
                logging_obj=logging,
            )
-            if stream == True:  ## [BETA]
+            if stream is True:  ## [BETA]
                # Fake streaming for petals
                resp_string = model_response["choices"][0]["message"]["content"]
                response = CustomStreamWrapper(
@ -2786,7 +2784,7 @@ def completion(
            import requests

            url = litellm.api_base or api_base or ""
-            if url == None or url == "":
+            if url is None or url == "":
                raise ValueError(
                    "api_base not set. Set api_base or litellm.api_base for custom endpoints"
                )
@ -3145,10 +3143,10 @@ def batch_completion_models(*args, **kwargs):
                    try:
                        result = future.result()
                        return result
-                    except Exception as e:
+                    except Exception:
                        # if model 1 fails, continue with response from model 2, model3
                        print_verbose(
-                            f"\n\ngot an exception, ignoring, removing from futures"
+                            "\n\ngot an exception, ignoring, removing from futures"
                        )
                        print_verbose(futures)
                        new_futures = {}
@ -3189,9 +3187,6 @@ def batch_completion_models_all_responses(*args, **kwargs):
    import concurrent.futures

    # ANSI escape codes for colored output
-    GREEN = "\033[92m"
-    RED = "\033[91m"
-    RESET = "\033[0m"

    if "model" in kwargs:
        kwargs.pop("model")
@ -3520,7 +3515,7 @@ def embedding(

            if api_base is None:
                raise ValueError(
-                    f"No API Base provided for Azure OpenAI LLM provider. Set 'AZURE_API_BASE' in .env"
+                    "No API Base provided for Azure OpenAI LLM provider. Set 'AZURE_API_BASE' in .env"
                )

            ## EMBEDDING CALL
@ -4106,7 +4101,6 @@ def text_completion(
    *args,
    **kwargs,
 ):
-    global print_verbose
    import copy

    """
@ -4136,7 +4130,7 @@ def text_completion(
        Your example of how to use this function goes here.
    """
    if "engine" in kwargs:
-        if model == None:
+        if model is None:
            # only use engine when model not passed
            model = kwargs["engine"]
        kwargs.pop("engine")
@ -4189,18 +4183,18 @@ def text_completion(

    if custom_llm_provider == "huggingface":
        # if echo == True, for TGI llms we need to set top_n_tokens to 3
-        if echo == True:
+        if echo is True:
            # for tgi llms
            if "top_n_tokens" not in kwargs:
                kwargs["top_n_tokens"] = 3

        # processing prompt - users can pass raw tokens to OpenAI Completion()
-        if type(prompt) == list:
+        if isinstance(prompt, list):
            import concurrent.futures

            tokenizer = tiktoken.encoding_for_model("text-davinci-003")
            ## if it's a 2d list - each element in the list is a text_completion() request
-            if len(prompt) > 0 and type(prompt[0]) == list:
+            if len(prompt) > 0 and isinstance(prompt[0], list):
                responses = [None for x in prompt]  # init responses

                def process_prompt(i, individual_prompt):
@ -4299,7 +4293,7 @@ def text_completion(
        raw_response = response._hidden_params.get("original_response", None)
        transformed_logprobs = litellm.utils.transform_logprobs(raw_response)
    except Exception as e:
-        print_verbose(f"LiteLLM non blocking exception: {e}")
+        verbose_logger.exception(f"LiteLLM non blocking exception: {e}")

    if isinstance(response, TextCompletionResponse):
        return response
@ -4813,12 +4807,12 @@ def transcription(
    Allows router to load balance between them
    """
    atranscription = kwargs.get("atranscription", False)
-    litellm_call_id = kwargs.get("litellm_call_id", None)
-    logger_fn = kwargs.get("logger_fn", None)
-    proxy_server_request = kwargs.get("proxy_server_request", None)
-    model_info = kwargs.get("model_info", None)
-    metadata = kwargs.get("metadata", {})
-    tags = kwargs.pop("tags", [])
+    kwargs.get("litellm_call_id", None)
+    kwargs.get("logger_fn", None)
+    kwargs.get("proxy_server_request", None)
+    kwargs.get("model_info", None)
+    kwargs.get("metadata", {})
+    kwargs.pop("tags", [])

    drop_params = kwargs.get("drop_params", None)
    client: Optional[
@ -4996,7 +4990,7 @@ def speech(
    model_info = kwargs.get("model_info", None)
    metadata = kwargs.get("metadata", {})
    model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base)  # type: ignore
-    tags = kwargs.pop("tags", [])
+    kwargs.pop("tags", [])

    optional_params = {}
    if response_format is not None:
@ -5345,12 +5339,12 @@ def print_verbose(print_statement):
        verbose_logger.debug(print_statement)
        if litellm.set_verbose:
            print(print_statement)  # noqa
-    except:
+    except Exception:
        pass


 def config_completion(**kwargs):
-    if litellm.config_path != None:
+    if litellm.config_path is not None:
        config_args = read_config_args(litellm.config_path)
        # overwrite any args passed in with config args
        return completion(**kwargs, **config_args)
@ -5408,16 +5402,18 @@ def stream_chunk_builder_text_completion(chunks: list, messages: Optional[List]
    response["choices"][0]["text"] = combined_content

    if len(combined_content) > 0:
-        completion_output = combined_content
+        pass
    else:
-        completion_output = ""
+        pass
    # # Update usage information if needed
    try:
        response["usage"]["prompt_tokens"] = token_counter(
            model=model, messages=messages
        )
-    except:  # don't allow this failing to block a complete streaming response from being returned
-        print_verbose(f"token_counter failed, assuming prompt tokens is 0")
+    except (
+        Exception
+    ):  # don't allow this failing to block a complete streaming response from being returned
+        print_verbose("token_counter failed, assuming prompt tokens is 0")
        response["usage"]["prompt_tokens"] = 0
    response["usage"]["completion_tokens"] = token_counter(
        model=model,