feat(lowest_latency.py): route by time to first token, for streaming requests (if available)

Closes https://github.com/BerriAI/litellm/issues/3574
2024-05-21 13:08:17 -07:00 · 2024-05-21 13:08:17 -07:00 · 2b3da449c8
commit 2b3da449c8
parent 620e6db027
3 changed files with 232 additions and 18 deletions
--- a/litellm/tests/test_custom_callback_input.py
+++ b/litellm/tests/test_custom_callback_input.py
@ -38,19 +38,17 @@ class CompletionCustomHandler(
    # Class variables or attributes
    def __init__(self):
        self.errors = []
-        self.states: Optional[
-            List[
-                Literal[
-                    "sync_pre_api_call",
-                    "async_pre_api_call",
-                    "post_api_call",
-                    "sync_stream",
-                    "async_stream",
-                    "sync_success",
-                    "async_success",
-                    "sync_failure",
-                    "async_failure",
-                ]
+        self.states: List[
+            Literal[
+                "sync_pre_api_call",
+                "async_pre_api_call",
+                "post_api_call",
+                "sync_stream",
+                "async_stream",
+                "sync_success",
+                "async_success",
+                "sync_failure",
+                "async_failure",
            ]
        ] = []

@ -269,6 +267,7 @@ class CompletionCustomHandler(
            assert isinstance(kwargs["litellm_params"]["api_base"], str)
            assert isinstance(kwargs["start_time"], (datetime, type(None)))
            assert isinstance(kwargs["stream"], bool)
+            assert isinstance(kwargs["completion_start_time"], datetime)
            assert kwargs["cache_hit"] is None or isinstance(kwargs["cache_hit"], bool)
            assert isinstance(kwargs["user"], (str, type(None)))
            assert isinstance(kwargs["input"], (list, dict, str))