Merge branch 'main' into main

2025-04-26 03:04:13 +00:00 · 2024-05-06 09:40:23 -03:00 · 2024-05-06 09:40:23 -03:00 · b22517845e
commit b22517845e
parent 78303b79ee 918367cc7b
98 changed files with 3926 additions and 997 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -12,9 +12,9 @@ from typing import Any, Literal, Union, BinaryIO
 from functools import partial
 import dotenv, traceback, random, asyncio, time, contextvars
 from copy import deepcopy
+
 import httpx
 import litellm
-
 from ._logging import verbose_logger
 from litellm import (  # type: ignore
    client,
@ -34,9 +34,12 @@ from litellm.utils import (
    async_mock_completion_streaming_obj,
    convert_to_model_response_object,
    token_counter,
+    create_pretrained_tokenizer,
+    create_tokenizer,
    Usage,
    get_optional_params_embeddings,
    get_optional_params_image_gen,
+    supports_httpx_timeout,
 )
 from .llms import (
    anthropic_text,
@ -75,6 +78,7 @@ from .llms.prompt_templates.factory import (
    prompt_factory,
    custom_prompt,
    function_call_prompt,
+    map_system_message_pt,
 )
 import tiktoken
 from concurrent.futures import ThreadPoolExecutor
@ -448,7 +452,7 @@ def completion(
    model: str,
    # Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
    messages: List = [],
-    timeout: Optional[Union[float, int]] = None,
+    timeout: Optional[Union[float, str, httpx.Timeout]] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    n: Optional[int] = None,
@ -551,6 +555,7 @@ def completion(
    eos_token = kwargs.get("eos_token", None)
    preset_cache_key = kwargs.get("preset_cache_key", None)
    hf_model_name = kwargs.get("hf_model_name", None)
+    supports_system_message = kwargs.get("supports_system_message", None)
    ### TEXT COMPLETION CALLS ###
    text_completion = kwargs.get("text_completion", False)
    atext_completion = kwargs.get("atext_completion", False)
@ -616,6 +621,7 @@ def completion(
        "model_list",
        "num_retries",
        "context_window_fallback_dict",
+        "retry_policy",
        "roles",
        "final_prompt_value",
        "bos_token",
@ -641,16 +647,27 @@ def completion(
        "no-log",
        "base_model",
        "stream_timeout",
+        "supports_system_message",
    ]
    default_params = openai_params + litellm_params
    non_default_params = {
        k: v for k, v in kwargs.items() if k not in default_params
    }  # model-specific params - pass them straight to the model/provider
-    if timeout is None:
-        timeout = (
-            kwargs.get("request_timeout", None) or 600
-        )  # set timeout for 10 minutes by default
-    timeout = float(timeout)
+
+    ### TIMEOUT LOGIC ###
+    timeout = timeout or kwargs.get("request_timeout", 600) or 600
+    # set timeout for 10 minutes by default
+
+    if (
+        timeout is not None
+        and isinstance(timeout, httpx.Timeout)
+        and supports_httpx_timeout(custom_llm_provider) == False
+    ):
+        read_timeout = timeout.read or 600
+        timeout = read_timeout  # default 10 min timeout
+    elif timeout is not None and not isinstance(timeout, httpx.Timeout):
+        timeout = float(timeout)  # type: ignore
+
    try:
        if base_url is not None:
            api_base = base_url
@ -745,6 +762,13 @@ def completion(
                custom_prompt_dict[model]["bos_token"] = bos_token
            if eos_token:
                custom_prompt_dict[model]["eos_token"] = eos_token
+
+        if (
+            supports_system_message is not None
+            and isinstance(supports_system_message, bool)
+            and supports_system_message == False
+        ):
+            messages = map_system_message_pt(messages=messages)
        model_api_key = get_api_key(
            llm_provider=custom_llm_provider, dynamic_api_key=api_key
        )  # get the api key from the environment if required for the model
@ -871,7 +895,7 @@ def completion(
                logger_fn=logger_fn,
                logging_obj=logging,
                acompletion=acompletion,
-                timeout=timeout,
+                timeout=timeout,  # type: ignore
                client=client,  # pass AsyncAzureOpenAI, AzureOpenAI client
            )

@ -1012,7 +1036,7 @@ def completion(
                    optional_params=optional_params,
                    litellm_params=litellm_params,
                    logger_fn=logger_fn,
-                    timeout=timeout,
+                    timeout=timeout,  # type: ignore
                    custom_prompt_dict=custom_prompt_dict,
                    client=client,  # pass AsyncOpenAI, OpenAI client
                    organization=organization,
@ -1097,7 +1121,7 @@ def completion(
                optional_params=optional_params,
                litellm_params=litellm_params,
                logger_fn=logger_fn,
-                timeout=timeout,
+                timeout=timeout,  # type: ignore
            )

            if (
@ -1471,7 +1495,7 @@ def completion(
                acompletion=acompletion,
                logging_obj=logging,
                custom_prompt_dict=custom_prompt_dict,
-                timeout=timeout,
+                timeout=timeout,  # type: ignore
            )
            if (
                "stream" in optional_params
@ -1564,7 +1588,7 @@ def completion(
                logger_fn=logger_fn,
                logging_obj=logging,
                acompletion=acompletion,
-                timeout=timeout,
+                timeout=timeout,  # type: ignore
            )
            ## LOGGING
            logging.post_call(
@ -1892,7 +1916,7 @@ def completion(
                logger_fn=logger_fn,
                encoding=encoding,
                logging_obj=logging,
-                timeout=timeout,
+                timeout=timeout,  # type: ignore
            )
            if (
                "stream" in optional_params
@ -2273,7 +2297,7 @@ def batch_completion(
    n: Optional[int] = None,
    stream: Optional[bool] = None,
    stop=None,
-    max_tokens: Optional[float] = None,
+    max_tokens: Optional[int] = None,
    presence_penalty: Optional[float] = None,
    frequency_penalty: Optional[float] = None,
    logit_bias: Optional[dict] = None,
@ -2666,6 +2690,7 @@ def embedding(
        "model_list",
        "num_retries",
        "context_window_fallback_dict",
+        "retry_policy",
        "roles",
        "final_prompt_value",
        "bos_token",
@ -3535,6 +3560,7 @@ def image_generation(
            "model_list",
            "num_retries",
            "context_window_fallback_dict",
+            "retry_policy",
            "roles",
            "final_prompt_value",
            "bos_token",