forked from phoenix-oss/llama-stack-mirror
Fix meta-reference GPU implementation for inference
This commit is contained in:
parent
f4b0f2af8b
commit
23f1980f9c
2 changed files with 2 additions and 2 deletions
|
@ -357,8 +357,8 @@ class ModelParallelProcessGroup:
|
|||
assert not self.running, "inference already running"
|
||||
|
||||
self.running = True
|
||||
self.request_socket.send(encode_msg(TaskRequest(task=req)))
|
||||
try:
|
||||
self.request_socket.send(encode_msg(TaskRequest(task=req)))
|
||||
while True:
|
||||
obj_json = self.request_socket.recv()
|
||||
obj = parse_message(obj_json)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue