diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 3e2e932e1..1884f5ce9 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/timeout.cpython-311.pyc b/litellm/__pycache__/timeout.cpython-311.pyc index 3817a2602..14a356abc 100644 Binary files a/litellm/__pycache__/timeout.cpython-311.pyc and b/litellm/__pycache__/timeout.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index acbc91552..a01a26e20 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/main.py b/litellm/main.py index 6e54d658e..4c3d75bf5 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -36,7 +36,7 @@ async def acompletion(*args, **kwargs): @client # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False` -@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout` +@timeout(600) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout` def completion( messages, model="gpt-3.5-turbo",# required params # Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create @@ -44,7 +44,7 @@ def completion( temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'), presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", deployment_id=None, # Optional liteLLM function params - *, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False, + *, return_async=False, api_key=None, force_timeout=600, azure=False, logger_fn=None, verbose=False, hugging_face = False, replicate=False,together_ai = False, custom_llm_provider=None, custom_api_base=None ): try: diff --git a/litellm/tests/test_load_test_model.py b/litellm/tests/test_load_test_model.py index 1ff74d580..373da1a6d 100644 --- a/litellm/tests/test_load_test_model.py +++ b/litellm/tests/test_load_test_model.py @@ -4,5 +4,6 @@ sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the import litellm from litellm import load_test_model -result = load_test_model(model="gpt-3.5-turbo", num_calls=5) +model="gpt-3.5-turbo" +result = load_test_model(model=model, num_calls=5) print(result) \ No newline at end of file diff --git a/litellm/timeout.py b/litellm/timeout.py index 37bbbffc1..81d99e7de 100644 --- a/litellm/timeout.py +++ b/litellm/timeout.py @@ -38,7 +38,7 @@ def timeout( thread.start() future = asyncio.run_coroutine_threadsafe(async_func(), thread.loop) local_timeout_duration = timeout_duration - if "force_timeout" in kwargs: + if "force_timeout" in kwargs and kwargs["force_timeout"] is not None: local_timeout_duration = kwargs["force_timeout"] try: result = future.result(timeout=local_timeout_duration) diff --git a/litellm/utils.py b/litellm/utils.py index ca0c6d83e..65cd96a8e 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -302,7 +302,7 @@ def get_optional_params( return optional_params return optional_params -def load_test_model(model: str, custom_llm_provider: str = None, prompt: str = None, num_calls: int = None): +def load_test_model(model: str, custom_llm_provider: str = None, custom_api_base: str = None, prompt: str = None, num_calls: int = None, force_timeout: int = None): test_prompt = "Hey, how's it going" test_calls = 100 if prompt: @@ -312,7 +312,7 @@ def load_test_model(model: str, custom_llm_provider: str = None, prompt: str = N messages = [[{"role": "user", "content": test_prompt}] for _ in range(test_calls)] start_time = time.time() try: - litellm.batch_completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider) + litellm.batch_completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider, custom_api_base = custom_api_base, force_timeout=force_timeout) end_time = time.time() response_time = end_time - start_time return {"total_response_time": response_time, "calls_made": 100, "status": "success", "exception": None} diff --git a/pyproject.toml b/pyproject.toml index 78f9d4712..d0094c107 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.382" +version = "0.1.383" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"