Merge branch 'main' into main

This commit is contained in:
Lucca Zenóbio 2024-05-06 09:40:23 -03:00 committed by GitHub
commit b22517845e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
98 changed files with 3926 additions and 997 deletions

View file

@ -12,9 +12,9 @@ from typing import Any, Literal, Union, BinaryIO
from functools import partial
import dotenv, traceback, random, asyncio, time, contextvars
from copy import deepcopy
import httpx
import litellm
from ._logging import verbose_logger
from litellm import ( # type: ignore
client,
@ -34,9 +34,12 @@ from litellm.utils import (
async_mock_completion_streaming_obj,
convert_to_model_response_object,
token_counter,
create_pretrained_tokenizer,
create_tokenizer,
Usage,
get_optional_params_embeddings,
get_optional_params_image_gen,
supports_httpx_timeout,
)
from .llms import (
anthropic_text,
@ -75,6 +78,7 @@ from .llms.prompt_templates.factory import (
prompt_factory,
custom_prompt,
function_call_prompt,
map_system_message_pt,
)
import tiktoken
from concurrent.futures import ThreadPoolExecutor
@ -448,7 +452,7 @@ def completion(
model: str,
# Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
messages: List = [],
timeout: Optional[Union[float, int]] = None,
timeout: Optional[Union[float, str, httpx.Timeout]] = None,
temperature: Optional[float] = None,
top_p: Optional[float] = None,
n: Optional[int] = None,
@ -551,6 +555,7 @@ def completion(
eos_token = kwargs.get("eos_token", None)
preset_cache_key = kwargs.get("preset_cache_key", None)
hf_model_name = kwargs.get("hf_model_name", None)
supports_system_message = kwargs.get("supports_system_message", None)
### TEXT COMPLETION CALLS ###
text_completion = kwargs.get("text_completion", False)
atext_completion = kwargs.get("atext_completion", False)
@ -616,6 +621,7 @@ def completion(
"model_list",
"num_retries",
"context_window_fallback_dict",
"retry_policy",
"roles",
"final_prompt_value",
"bos_token",
@ -641,16 +647,27 @@ def completion(
"no-log",
"base_model",
"stream_timeout",
"supports_system_message",
]
default_params = openai_params + litellm_params
non_default_params = {
k: v for k, v in kwargs.items() if k not in default_params
} # model-specific params - pass them straight to the model/provider
if timeout is None:
timeout = (
kwargs.get("request_timeout", None) or 600
) # set timeout for 10 minutes by default
timeout = float(timeout)
### TIMEOUT LOGIC ###
timeout = timeout or kwargs.get("request_timeout", 600) or 600
# set timeout for 10 minutes by default
if (
timeout is not None
and isinstance(timeout, httpx.Timeout)
and supports_httpx_timeout(custom_llm_provider) == False
):
read_timeout = timeout.read or 600
timeout = read_timeout # default 10 min timeout
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
timeout = float(timeout) # type: ignore
try:
if base_url is not None:
api_base = base_url
@ -745,6 +762,13 @@ def completion(
custom_prompt_dict[model]["bos_token"] = bos_token
if eos_token:
custom_prompt_dict[model]["eos_token"] = eos_token
if (
supports_system_message is not None
and isinstance(supports_system_message, bool)
and supports_system_message == False
):
messages = map_system_message_pt(messages=messages)
model_api_key = get_api_key(
llm_provider=custom_llm_provider, dynamic_api_key=api_key
) # get the api key from the environment if required for the model
@ -871,7 +895,7 @@ def completion(
logger_fn=logger_fn,
logging_obj=logging,
acompletion=acompletion,
timeout=timeout,
timeout=timeout, # type: ignore
client=client, # pass AsyncAzureOpenAI, AzureOpenAI client
)
@ -1012,7 +1036,7 @@ def completion(
optional_params=optional_params,
litellm_params=litellm_params,
logger_fn=logger_fn,
timeout=timeout,
timeout=timeout, # type: ignore
custom_prompt_dict=custom_prompt_dict,
client=client, # pass AsyncOpenAI, OpenAI client
organization=organization,
@ -1097,7 +1121,7 @@ def completion(
optional_params=optional_params,
litellm_params=litellm_params,
logger_fn=logger_fn,
timeout=timeout,
timeout=timeout, # type: ignore
)
if (
@ -1471,7 +1495,7 @@ def completion(
acompletion=acompletion,
logging_obj=logging,
custom_prompt_dict=custom_prompt_dict,
timeout=timeout,
timeout=timeout, # type: ignore
)
if (
"stream" in optional_params
@ -1564,7 +1588,7 @@ def completion(
logger_fn=logger_fn,
logging_obj=logging,
acompletion=acompletion,
timeout=timeout,
timeout=timeout, # type: ignore
)
## LOGGING
logging.post_call(
@ -1892,7 +1916,7 @@ def completion(
logger_fn=logger_fn,
encoding=encoding,
logging_obj=logging,
timeout=timeout,
timeout=timeout, # type: ignore
)
if (
"stream" in optional_params
@ -2273,7 +2297,7 @@ def batch_completion(
n: Optional[int] = None,
stream: Optional[bool] = None,
stop=None,
max_tokens: Optional[float] = None,
max_tokens: Optional[int] = None,
presence_penalty: Optional[float] = None,
frequency_penalty: Optional[float] = None,
logit_bias: Optional[dict] = None,
@ -2666,6 +2690,7 @@ def embedding(
"model_list",
"num_retries",
"context_window_fallback_dict",
"retry_policy",
"roles",
"final_prompt_value",
"bos_token",
@ -3535,6 +3560,7 @@ def image_generation(
"model_list",
"num_retries",
"context_window_fallback_dict",
"retry_policy",
"roles",
"final_prompt_value",
"bos_token",