mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
feat(scheduler.py): add request prioritization scheduler
allow user to set priority for a request
This commit is contained in:
parent
aada7b4bd3
commit
79287a7584
8 changed files with 394 additions and 123 deletions
|
@ -103,6 +103,36 @@ async def chat_completion(session, key, model: Union[str, List] = "gpt-4"):
|
|||
return await response.json()
|
||||
|
||||
|
||||
async def queue_chat_completion(
|
||||
session, key, priority: int, model: Union[str, List] = "gpt-4"
|
||||
):
|
||||
url = "http://0.0.0.0:4000/queue/chat/completions"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
data = {
|
||||
"model": model,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hello!"},
|
||||
],
|
||||
"priority": priority,
|
||||
}
|
||||
|
||||
async with session.post(url, headers=headers, json=data) as response:
|
||||
status = response.status
|
||||
response_text = await response.text()
|
||||
|
||||
print(response_text)
|
||||
print()
|
||||
|
||||
if status != 200:
|
||||
raise Exception(f"Request did not return a 200 status code: {status}")
|
||||
|
||||
return response.raw_headers
|
||||
|
||||
|
||||
async def chat_completion_with_headers(session, key, model="gpt-4"):
|
||||
url = "http://0.0.0.0:4000/chat/completions"
|
||||
headers = {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue