example inference router run config

This commit is contained in:
Xi Yan 2024-09-19 22:22:00 -07:00
parent bce79617bf
commit e2c7a3cea9
2 changed files with 75 additions and 10 deletions

View file

@ -101,16 +101,17 @@ async def run_main(host: str, port: int, stream: bool):
async for log in EventLogger().log(iterator):
log.print()
cprint(f"User>{message.content}", "green")
iterator = client.chat_completion(
ChatCompletionRequest(
model="Meta-Llama3.1-8B",
messages=[message],
stream=stream,
)
)
async for log in EventLogger().log(iterator):
log.print()
# For testing models routing
# cprint(f"User>{message.content}", "green")
# iterator = client.chat_completion(
# ChatCompletionRequest(
# model="Meta-Llama3.1-8B",
# messages=[message],
# stream=stream,
# )
# )
# async for log in EventLogger().log(iterator):
# log.print()
def main(host: str, port: int, stream: bool = True):