forked from phoenix/litellm-mirror
feat(lowest_latency.py): route by time to first token, for streaming requests (if available)
Closes https://github.com/BerriAI/litellm/issues/3574
This commit is contained in:
parent
620e6db027
commit
2b3da449c8
3 changed files with 232 additions and 18 deletions
|
@ -38,19 +38,17 @@ class CompletionCustomHandler(
|
|||
# Class variables or attributes
|
||||
def __init__(self):
|
||||
self.errors = []
|
||||
self.states: Optional[
|
||||
List[
|
||||
Literal[
|
||||
"sync_pre_api_call",
|
||||
"async_pre_api_call",
|
||||
"post_api_call",
|
||||
"sync_stream",
|
||||
"async_stream",
|
||||
"sync_success",
|
||||
"async_success",
|
||||
"sync_failure",
|
||||
"async_failure",
|
||||
]
|
||||
self.states: List[
|
||||
Literal[
|
||||
"sync_pre_api_call",
|
||||
"async_pre_api_call",
|
||||
"post_api_call",
|
||||
"sync_stream",
|
||||
"async_stream",
|
||||
"sync_success",
|
||||
"async_success",
|
||||
"sync_failure",
|
||||
"async_failure",
|
||||
]
|
||||
] = []
|
||||
|
||||
|
@ -269,6 +267,7 @@ class CompletionCustomHandler(
|
|||
assert isinstance(kwargs["litellm_params"]["api_base"], str)
|
||||
assert isinstance(kwargs["start_time"], (datetime, type(None)))
|
||||
assert isinstance(kwargs["stream"], bool)
|
||||
assert isinstance(kwargs["completion_start_time"], datetime)
|
||||
assert kwargs["cache_hit"] is None or isinstance(kwargs["cache_hit"], bool)
|
||||
assert isinstance(kwargs["user"], (str, type(None)))
|
||||
assert isinstance(kwargs["input"], (list, dict, str))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue