mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
chore(stash rate limit manager changes ):
This commit is contained in:
parent
306a38880d
commit
56c87febae
1 changed files with 21 additions and 1 deletions
|
@ -3864,7 +3864,27 @@ class RateLimitManager():
|
||||||
def __init__(self, max_tokens_per_minute, max_requests_per_minute):
|
def __init__(self, max_tokens_per_minute, max_requests_per_minute):
|
||||||
self.max_tokens_per_minute = max_tokens_per_minute
|
self.max_tokens_per_minute = max_tokens_per_minute
|
||||||
self.max_requests_per_minute = max_requests_per_minute
|
self.max_requests_per_minute = max_requests_per_minute
|
||||||
print("init rate limit handler")
|
# print("init rate limit handler")
|
||||||
|
self.status_tracker = StatusTracker()
|
||||||
|
|
||||||
|
# async def acompletion(self, max_attempts=5, kwargs):
|
||||||
|
# # init request
|
||||||
|
# request = APIRequest(
|
||||||
|
# task_id=next(self.task_id_generator_function()),
|
||||||
|
# request_json=kwargs,
|
||||||
|
# token_consumption=self.num_tokens_consumed_from_request(
|
||||||
|
# request_json, token_encoding_name
|
||||||
|
# ),
|
||||||
|
# attempts_left=max_attempts,
|
||||||
|
# metadata=request_json.pop("metadata", None),
|
||||||
|
# )
|
||||||
|
|
||||||
|
# # check current capacity for model
|
||||||
|
|
||||||
|
# # if under capacity
|
||||||
|
# # check if fallback model specified
|
||||||
|
|
||||||
|
# # if no fallback model specified then wait to process request
|
||||||
|
|
||||||
|
|
||||||
async def batch_completion(
|
async def batch_completion(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue