fix(router.py): remove wrapping of router.completion() let clients handle this

2025-04-26 03:04:13 +00:00 · 2024-01-30 21:11:55 -08:00 · 2024-01-30 21:11:55 -08:00 · a07f3ec2d4
commit a07f3ec2d4
parent 4219fe02d7
2 changed files with 88 additions and 5 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -289,11 +289,7 @@ class Router:
            timeout = kwargs.get("request_timeout", self.timeout)
            kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
            kwargs.setdefault("metadata", {}).update({"model_group": model})
-            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
+            response = self.function_with_fallbacks(**kwargs)
                # Submit the function to the executor with a timeout
                future = executor.submit(self.function_with_fallbacks, **kwargs)
                response = future.result(timeout=timeout)  # type: ignore
            return response
        except Exception as e:
            raise e
--- a/litellm/tests/test_router_timeout.py
+++ b/litellm/tests/test_router_timeout.py
@ -0,0 +1,87 @@
 #### What this tests ####
 # This tests if the router timeout error handling during fallbacks
 import sys, os, time
 import traceback, asyncio
 import pytest
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import os
 import litellm
 from litellm import Router
 from dotenv import load_dotenv
 load_dotenv()
 def test_router_timeouts():
    # Model list for OpenAI and Anthropic models
    model_list = [
        {
            "model_name": "openai-gpt-4",
            "litellm_params": {
                "model": "azure/chatgpt-v-2",
                "api_key": "os.environ/AZURE_API_KEY",
                "api_base": "os.environ/AZURE_API_BASE",
                "api_version": "os.environ/AZURE_API_VERSION",
            },
            "tpm": 80000,
        },
        {
            "model_name": "anthropic-claude-instant-1.2",
            "litellm_params": {
                "model": "claude-instant-1",
                "api_key": "os.environ/ANTHROPIC_API_KEY",
            },
            "tpm": 20000,
        },
    ]
    fallbacks_list = [
        {"openai-gpt-4": ["anthropic-claude-instant-1.2"]},
    ]
    # Configure router
    router = Router(
        model_list=model_list,
        fallbacks=fallbacks_list,
        routing_strategy="usage-based-routing",
        debug_level="INFO",
        set_verbose=True,
        redis_host=os.getenv("REDIS_HOST"),
        redis_password=os.getenv("REDIS_PASSWORD"),
        redis_port=int(os.getenv("REDIS_PORT")),
        timeout=10,
    )
    print("***** TPM SETTINGS *****")
    for model_object in model_list:
        print(f"{model_object['model_name']}: {model_object['tpm']} TPM")
    # Sample list of questions
    questions_list = [
        {"content": "Tell me a very long joke.", "modality": "voice"},
    ]
    total_tokens_used = 0
    # Process each question
    for question in questions_list:
        messages = [{"content": question["content"], "role": "user"}]
        prompt_tokens = litellm.token_counter(text=question["content"], model="gpt-4")
        print("prompt_tokens = ", prompt_tokens)
        response = router.completion(
            model="openai-gpt-4", messages=messages, timeout=5, num_retries=0
        )
        total_tokens_used += response.usage.total_tokens
        print("Response:", response)
        print("********** TOKENS USED SO FAR = ", total_tokens_used)