feat(scheduler.py): add request prioritization scheduler

allow user to set priority for a request
This commit is contained in:
Krrish Dholakia 2024-05-31 18:51:13 -07:00
parent aada7b4bd3
commit 79287a7584
8 changed files with 394 additions and 123 deletions

View file

@ -103,6 +103,36 @@ async def chat_completion(session, key, model: Union[str, List] = "gpt-4"):
return await response.json()
async def queue_chat_completion(
session, key, priority: int, model: Union[str, List] = "gpt-4"
):
url = "http://0.0.0.0:4000/queue/chat/completions"
headers = {
"Authorization": f"Bearer {key}",
"Content-Type": "application/json",
}
data = {
"model": model,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
],
"priority": priority,
}
async with session.post(url, headers=headers, json=data) as response:
status = response.status
response_text = await response.text()
print(response_text)
print()
if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}")
return response.raw_headers
async def chat_completion_with_headers(session, key, model="gpt-4"):
url = "http://0.0.0.0:4000/chat/completions"
headers = {